#check not missing values
m = df['treatment'].notnull()
#create groups starting not missing values
s = m.cumsum()
#add missing values for first group and for not missing values
mask = (s == 0) | m
#subtract score with first score per group
out = df['score'] - df['score'].groupby(s).transform('first')
#add missing values
df['year diff'] = np.where(mask, np.nan, out)
print (df)
year treatment score year diff
0 2010 NaN 1 NaN
1 2011 NaN 2 NaN
2 2012 NaN 3 NaN
3 2013 9.0 4 NaN
4 2014 NaN 5 1.0
5 2015 NaN 6 2.0
6 2016 NaN 7 3.0
7 2017 NaN 8 4.0
8 2018 NaN 9 5.0
9 2019 NaN 10 6.0
10 2020 10.0 11 NaN
11 2021 NaN 12 1.0
12 2022 NaN 13 2.0
13 2023 NaN 14 3.0
14 2024 NaN 15 4.0
15 2025 12.0 16 NaN
16 2026 NaN 17 1.0
17 2027 NaN 18 2.0
re_list= []
for index,row in df.iterrows():
if index > min(mylist):
m = [i for i in mylist if i <= index]
re_list.append(df.iloc[index]['year'] - df.iloc[max(m)]['year'])
else:
re_list.append(0)
df['Result'] = re_list
IIUC,你可以用:
或者如果你需要考虑基于治疗价值的得分差异:
用途:
如果要使用
for
循环:现在我们减去
year
值相关问题 更多 >
编程相关推荐