print (df)
trID event time frameID
0 tID1 d 2015-01-01 1
1 tID1 s 2015-01-02 1
2 tID1 d 2015-01-03 1
3 tID1 d 2015-01-04 2
4 tID1 s 2015-01-05 2
5 tID1 d 2015-01-06 1
6 tID1 s 2015-01-07 1
#create helper column for distinguish frameID with duplicates for unique groups
df['g'] = df['frameID'].ne(df['frameID'].shift()).cumsum()
#aggregate by 3 columns ang get difference
a = df.groupby(['trID','g','frameID'])['time'].agg(lambda x: x.max() - x.min())
print (a)
trID g frameID
tID1 1 1 2 days
2 2 1 days
3 1 1 days
Name: time, dtype: timedelta64[ns]
#sum by first and third level (trID, frameID) - get days
df1 = a.sum(level=[0,2]).dt.days.reset_index(name='SUM')
#similar get seconds if necessary
#df1 = a.sum(level=[0,2]).dt.total_seconds().reset_index(name='SUM')
print (df1)
trID frameID SUM
0 tID1 1 3
1 tID1 2 1
编辑:
print (df)
trID event time frameID
0 tID1 d 2015-01-01 2
1 tID1 s 2015-01-02 2
2 tID1 d 2015-01-03 1
3 tID1 d 2015-01-04 1
4 tID2 s 2015-01-05 1
5 tID2 d 2015-01-06 1
6 tID1 s 2015-01-07 1
7 tID1 s 2015-01-08 1
df['time'] = pd.to_datetime(df['time'])
#create 2 columns for groups
df[['g1','g2']] = df[['trID','frameID']].ne(df[['trID','frameID']].shift()).cumsum()
#add both new columns
a = df.groupby(['trID','g1','g2','frameID'])['time'].agg(lambda x: x.max() - x.min())
print (a)
trID g1 g2 frameID
tID1 1 1 2 1 days
2 1 1 days
3 2 1 1 days
tID2 2 2 1 1 days
Name: time, dtype: timedelta64[ns]
#sum by first and fourth level (trID, frameID)
df1 = a.sum(level=[0,3]).dt.days.reset_index(name='SUM')
print (df1)
trID frameID SUM
0 tID1 2 1
1 tID1 1 2
2 tID2 1 1
用途:
编辑:
相关问题 更多 >
编程相关推荐