如何通过对数据帧中的每一行应用函数来创建新列?

2024-06-10 01:06:42 发布

您现在位置:Python中文网/ 问答频道 /正文

我试图通过对每一行应用一个函数在我的数据框中创建一个新列(我使用来自https://pbpython.com/amortization-model.html的函数)。当我尝试使用随机数时,该函数工作正常,但当我应用于数据帧中的每一行时,我得到了这个错误(OutOfBoundsDatetime:Outofbounds nanosecond时间戳:2459-09-01 00:00:00)

#%%
import pandas as pd
from datetime import date
import numpy as np
from datetime import datetime
import math
import numpy_financial as npf
# Solo se leen en esta parte
interest_rate = 10
years         = 7
payments_year = 12
principal     = 100000

def amortization_table(interest_rate, years,payments_year, principal):
    addl_principal=0
    start_date=pd.to_datetime(date.today(), errors='coerce')
    #Calculate the amortization schedule given the loan details

    #Args:
    #   interest_rate: The annual interest rate for this loan
    #   years: Number of years for the loan
    #   payments_year: Number of payments in a year
    #   principal: Amount borrowed
    #   addl_principal (optional): Additional payments to be made each period. Assume 0 if nothing provided.
    # must be a value less then 0, the function will convert a positive value to
    #                             negative
    #  start_date (optional): Start date. Will start on first of next month if none provided

    #Returns:
    #    schedule: Amortization schedule as a pandas dataframe
    #    summary: Pandas dataframe that summarizes the payoff information

    # Ensure the additional payments are negative
    if addl_principal > 0:
        addl_principal = -addl_principal

    # Create an index of the payment dates
    rng = pd.date_range(start_date, periods=years * payments_year, freq='MS')
    rng.name = "Payment_Date"
    rng = pd.to_datetime(rng, errors='coerce')

    # Build up the Amortization schedule as a DataFrame
    df = pd.DataFrame(index=rng,columns=['Payment', 'Principal', 'Interest', 
                                            'Addl_Principal', 'Curr_Balance'], dtype='float')

    # Add index by period (start at 1 not 0)
    df.reset_index(inplace=True)
    df.index += 1
    df.index.name = "Period"

    # Calculate the payment, principal and interests amounts using built in Numpy functions
    per_payment = npf.pmt(interest_rate/payments_year, years*payments_year, principal)
    df["Payment"] = per_payment
    df["Principal"] = npf.ppmt(interest_rate/payments_year, df.index, years*payments_year, principal)
    df["Interest"] = npf.ipmt(interest_rate/payments_year, df.index, years*payments_year, principal)
    df["VP_Interest"] = npf.pv(rate=interest_rate/12, nper = df.index - 1, pmt=0, fv=df["Interest"])
        
    # Round the values
    df = df.round(2) 

    # Add in the additional principal payments
    df["Addl_Principal"] = addl_principal

    # Store the Cumulative Principal Payments and ensure it never gets larger than the original principal
    df["Cumulative_Principal"] = (df["Principal"] + df["Addl_Principal"]).cumsum()
    df["Cumulative_Principal"] = df["Cumulative_Principal"].clip(lower=-principal)

    # Calculate the current balance for each period
    df["Curr_Balance"] = principal + df["Cumulative_Principal"]

    # Determine the last payment date
    try:
        last_payment = df.query("Curr_Balance <= 0")["Curr_Balance"].idxmax(axis=1, skipna=True)
    except ValueError:
        last_payment = df.last_valid_index()

    last_payment_date = "{:%m-%d-%Y}".format(df.loc[last_payment, "Payment_Date"])
        
    # Truncate the data frame if we have additional principal payments:
    if addl_principal != 0:
                
        # Remove the extra payment periods
        df = df.loc[0:last_payment].copy()
        
        # Calculate the principal for the last row
        df.loc[last_payment, "Principal"] = -(df.loc[last_payment-1, "Curr_Balance"])
        
        # Calculate the total payment for the last row
        df.loc[last_payment, "Payment"] = df.loc[last_payment, ["Principal", "Interest"]].sum()
        
        # Zero out the additional principal
        df.loc[last_payment, "Addl_Principal"] = 0
        
    # Get the payment info into a DataFrame in column order
    payment_info = (df[["Payment", "Principal", "Addl_Principal", "Interest", "VP_Interest"]]
                    .sum().to_frame().T)
        
    # Format the Date DataFrame
    payment_details = pd.DataFrame.from_dict(dict([('payoff_date', [last_payment_date]),
                                                ('Interest Rate', [interest_rate]),
                                                ('Number of years', [years])
                                                ]))
    # Add a column showing how much we pay each period.
    # Combine addl principal with principal for total payment
    payment_details["Period_Payment"] = round(per_payment, 2) + addl_principal

    payment_summary = pd.concat([payment_details, payment_info], axis=1)
    cumInt = payment_summary.VP_Interest[0]*-1
    cumInt2 = payment_summary.Interest[0]*-1

    return cumInt
#%%
id_cl = pd.read_csv('C:/shiny_Dashboard/Anton/id_cl3.csv', sep = ',')
id_cl['yint'] = id_cl['disb'] * id_cl['intrate']/100 
id_cl['plazorestanteendias'] =  np.random.randint(360, 3600, id_cl.shape[0])
id_cl['termy'] = id_cl['plazorestanteendias']/360
id_cl['lenk'] =  12
print(amortization_table(2 ,3, 12,100000))
# %%
#New column
id_cl['VPInt'] = np.vectorize(amortization_table)(id_cl['intrate'], 
                            id_cl['plazorestanteendias'], id_cl['lenk'], id_cl['disb'])

我尝试了这个替代选项,但得到了相同的错误

id_cl['VPInt'] = id_cl.apply(lambda row : amortization_table(row['intrate'], 
                        row['plazorestanteendias'], row['lenk'], row['disb']), axis = 1)
print(id_cl)

以下是我的数据框架的一部分:

^{tb1}$
OutOfBoundsDatetime: Out of bounds nanosecond timestamp: 3689-09-01 00:00:00

OutOfBoundsDatetime                       Traceback (most recent call last)
c:\shiny_Dashboard\Anton\functionTesting.py in <module>
      1 #def amortization_table(interest_rate, years,payments_year, principal):
----> 2 id_cl['VPInt'] = id_cl.apply(lambda row : amortization_table(row['intrate'], 
      3                             row['plazorestanteendias'], row['lenk'], row['disb']), axis = 1)
      4 print(id_cl)

C:\py\lib\site-packages\pandas\core\frame.py in apply(self, func, axis, raw, result_type, args, **kwds)
   7766             kwds=kwds,
   7767         )
-> 7768         return op.get_result()
   7769 
   7770     def applymap(self, func, na_action: Optional[str] = None) -> DataFrame:

C:\py\lib\site-packages\pandas\core\apply.py in get_result(self)
    183             return self.apply_raw()
    184 
--> 185         return self.apply_standard()
    186 
    187     def apply_empty_result(self):

C:\py\lib\site-packages\pandas\core\apply.py in apply_standard(self)
    274 
    275     def apply_standard(self):
--> 276         results, res_index = self.apply_series_generator()
    277 
    278         # wrap results

C:\py\lib\site-packages\pandas\core\apply.py in apply_series_generator(self)
    288             for i, v in enumerate(series_gen):
    289                 # ignore SettingWithCopy here in case the user mutates
--> 290                 results[i] = self.f(v)
    291                 if isinstance(results[i], ABCSeries):
    292                     # If we have a view on v, we need to make a copy because

c:\shiny_Dashboard\Anton\functionTesting.py in <lambda>(row)
      1 #def amortization_table(interest_rate, years,payments_year, principal):
----> 2 id_cl['VPInt'] = id_cl.apply(lambda row : amortization_table(row['intrate'], 
      3                             row['plazorestanteendias'], row['lenk'], row['disb']), axis = 1)
      4 print(id_cl)

c:\shiny_Dashboard\Anton\functionTesting.py in amortization_table(interest_rate, years, payments_year, principal)
     36 
     37     # Create an index of the payment dates
---> 38     rng = pd.date_range(start_date, periods=years * payments_year, freq='MS')
     39     rng.name = "Payment_Date"
     40     rng = pd.to_datetime(rng, errors='coerce')

C:\py\lib\site-packages\pandas\core\indexes\datetimes.py in date_range(start, end, periods, freq, tz, normalize, name, closed, **kwargs)
   1067         freq = "D"
   1068 
-> 1069     dtarr = DatetimeArray._generate_range(
   1070         start=start,
   1071         end=end,

C:\py\lib\site-packages\pandas\core\arrays\datetimes.py in _generate_range(cls, start, end, periods, freq, tz, normalize, ambiguous, nonexistent, closed)
    414             else:
    415                 xdr = generate_range(start=start, end=end, periods=periods, offset=freq)
--> 416                 values = np.array([x.value for x in xdr], dtype=np.int64)
    417 
    418             _tz = start.tz if start is not None else end.tz

C:\py\lib\site-packages\pandas\core\arrays\datetimes.py in <listcomp>(.0)
    414             else:
    415                 xdr = generate_range(start=start, end=end, periods=periods, offset=freq)
--> 416                 values = np.array([x.value for x in xdr], dtype=np.int64)
    417 
    418             _tz = start.tz if start is not None else end.tz

C:\py\lib\site-packages\pandas\core\arrays\datetimes.py in generate_range(start, end, periods, offset)
   2436 
   2437     if end is None:
-> 2438         end = start + (periods - 1) * offset
   2439 
   2440     if start is None:

pandas\_libs\tslibs\offsets.pyx in pandas._libs.tslibs.offsets.BaseOffset.__add__()

pandas\_libs\tslibs\offsets.pyx in pandas._libs.tslibs.offsets.BaseOffset.__add__()

pandas\_libs\tslibs\offsets.pyx in pandas._libs.tslibs.offsets.apply_wraps.wrapper()

pandas\_libs\tslibs\offsets.pyx in pandas._libs.tslibs.offsets.MonthOffset.apply()

pandas\_libs\tslibs\offsets.pyx in pandas._libs.tslibs.offsets.shift_month()

pandas\_libs\tslibs\timestamps.pyx in pandas._libs.tslibs.timestamps.Timestamp.replace()

pandas\_libs\tslibs\conversion.pyx in pandas._libs.tslibs.conversion.convert_datetime_to_tsobject()

pandas\_libs\tslibs\np_datetime.pyx in pandas._libs.tslibs.np_datetime.check_dts_bounds()

OutOfBoundsDatetime: Out of bounds nanosecond timestamp: 3689-09-01 00:00:00

Tags: theinpyidprincipalpandasdfcl
1条回答
网友
1楼 · 发布于 2024-06-10 01:06:42

在我看来,这个错误相当具有描述性。您正试图使用pd.date_range生成一个范围,该范围返回一个DatetimeIndexDatetimeIndex is limited in scope due to internal representation

因此,您可以放弃尝试推断3000多年的摊销率,或者使用另一种索引方法,如periodIndex=>span = pd.period_range('2021-09-01', '5000-01-01', freq = 'M')将生成从本月到5000年1月的月度指数

相关问题 更多 >