从dataframe列获取以前的值

DocumentId offset feature 0 0 2000 0 7 2000 0 16 0 0 27 0 0 36 0 0 40 0 0 46 0 0 57 0 0 63 0 0 78 0 0 88 0 0 91 0 0 103 2200 1 109 0 1 113 2200 1 126 2200 1 131 2200 1 137 2200 1 142 0 1 152 0 1 157 200 1 159 200 1 161 200 1 167 0 1 170 200

offset1 feature offset2 feature offset3 feature - - 0 2000 7 2000 103 2200 91 0 88 0 137 2200 131 2200 126 2200 161 200 159 200 157 200 offset4 feature offset5 feature offset6 feature 103 2200 109 0 113 2200 113 2200 126 2200 131 2200 157 200 159 200 161 200

offset4 feature offset5 feature offset6 feature 103 2200 109 0 113 2200 Now, after this If I go down, then immediately at 113 the previous value was 0 that gets changed to 2200

zero_indexes = list(input_with[input_csv['RFC_PREDICTEDFEATURE'] == 0].index) df2 = pd.DataFrame() for each_zero_index in zero_indexes: value = input_with['feature'].loc[each_zero_index - 1: each_zero_index].values[0] if value != 0 : df1 = input_with.loc[each_zero_index - 3: each_zero_index] df2 = df2.append(df1)

1条回答

网友

1楼 · 发布于 2024-04-20 01:52:26

使用strides：

def rolling_window(a, window):
    shape = a.shape[:-1] + (a.shape[-1] - window + 1, window)
    strides = a.strides + (a.strides[-1],)
    return np.lib.stride_tricks.as_strided(a, shape=shape, strides=strides)

n = 3
x = np.concatenate([[np.nan] * (n - 1), df['feature'].values])
#change order by indexing [:, ::-1]
arr1 = rolling_window(x, n)[:, ::-1]

print (arr1)
[[2000.   nan   nan]
 [2000. 2000.   nan]
 [   0. 2000. 2000.]
 [   0.    0. 2000.]
 [   0.    0.    0.]
 [   0.    0.    0.]
 [   0.    0.    0.]
 [   0.    0.    0.]
 [   0.    0.    0.]
 [   0.    0.    0.]
 [   0.    0.    0.]
 [   0.    0.    0.]
 [2200.    0.    0.]
 [   0. 2200.    0.]
 [2200.    0. 2200.]
 [2200. 2200.    0.]
 [2200. 2200. 2200.]
 [2200. 2200. 2200.]
 [   0. 2200. 2200.]
 [   0.    0. 2200.]
 [ 200.    0.    0.]
 [ 200.  200.    0.]
 [ 200.  200.  200.]
 [   0.  200.  200.]
 [ 200.    0.  200.]]

x = np.concatenate([[np.nan] * (n - 1), df['offset'].values])
arr2 = rolling_window(x, n)[:, ::-1]

print (arr2)
[[  0.  nan  nan]
 [  7.   0.  nan]
 [ 16.   7.   0.]
 [ 27.  16.   7.]
 [ 36.  27.  16.]
 [ 40.  36.  27.]
 [ 46.  40.  36.]
 [ 57.  46.  40.]
 [ 63.  57.  46.]
 [ 78.  63.  57.]
 [ 88.  78.  63.]
 [ 91.  88.  78.]
 [103.  91.  88.]
 [109. 103.  91.]
 [113. 109. 103.]
 [126. 113. 109.]
 [131. 126. 113.]
 [137. 131. 126.]
 [142. 137. 131.]
 [152. 142. 137.]
 [157. 152. 142.]
 [159. 157. 152.]
 [161. 159. 157.]
 [167. 161. 159.]
 [170. 167. 161.]]

然后在arr1的第一个'column'中为正值和下一个0值创建掩码：

m =  np.append(arr1[1:, 0] == 0, False) & (arr1[:, 0] != 0)
arr1 = arr1[m] 
arr2 = arr2[m] 

#change order in first row
arr1[:1] = arr1[:1, ::-1]
arr2[:1] = arr2[:1, ::-1]


#create DataFrames and join together    
df1 = pd.DataFrame(arr1).add_prefix('feature_')
df2 = pd.DataFrame(arr2).add_prefix('offset_')
print (df1)
   feature_0  feature_1  feature_2
0        NaN     2000.0     2000.0
1     2200.0        0.0        0.0
2     2200.0     2200.0     2200.0
3      200.0      200.0      200.0

print (df2)
   offset_0  offset_1  offset_2
0       NaN       0.0       7.0
1     103.0      91.0      88.0
2     137.0     131.0     126.0
3     161.0     159.0     157.0

#https://stackoverflow.com/a/45122187/2901002
#change order of columns
a = np.arange(n).astype(str)
cols = [item for x in a for item in ('offset_' + x, 'feature_' + x)]
df = pd.concat([df1, df2], axis=1)[cols]
print (df)
   offset_0  feature_0  offset_1  feature_1  offset_2  feature_2
0       NaN        NaN       0.0     2000.0       7.0     2000.0
1     103.0     2200.0      91.0        0.0      88.0        0.0
2     137.0     2200.0     131.0     2200.0     126.0     2200.0
3     161.0      200.0     159.0      200.0     157.0      200.0

编辑：

def rolling_window(a, window):
    shape = a.shape[:-1] + (a.shape[-1] - window + 1, window)
    strides = a.strides + (a.strides[-1],)
    return np.lib.stride_tricks.as_strided(a, shape=shape, strides=strides)

n = 3
x = np.concatenate([[np.nan] * (n - 1), df['feature'].values])
arr1 = rolling_window(x, n)[:, ::-1]
#print (arr1)

x = np.concatenate([[np.nan] * (n - 1), df['offset'].values])
arr2 = rolling_window(x, n)[:, ::-1]
#print (arr2)

x1 = np.concatenate([arr1[:, 0], [np.nan] * (n - 1)])
arr11 = rolling_window(x1, n)
#print (arr11)

x2 = np.concatenate([arr2[:, 0], [np.nan] * (n - 1)])
arr22 = rolling_window(x2, n)
#print (arr22)

m1 =  np.append(False, arr11[:-1, 0] == 0) & (arr11[:, 0] != 0)

arr11 = arr11[m1] 
arr22 = arr22[m1] 

df11 = pd.DataFrame(arr11).rename(columns = lambda x: x + 4).add_prefix('feature_')
df22 = pd.DataFrame(arr22).rename(columns = lambda x: x + 4).add_prefix('offset_')
print (df11)
   feature_4  feature_5  feature_6
0     2200.0        0.0     2200.0
1     2200.0     2200.0     2200.0
2      200.0      200.0      200.0
3      200.0        NaN        NaN

print (df22)
   offset_4  offset_5  offset_6
0     103.0     109.0     113.0
1     113.0     126.0     131.0
2     157.0     159.0     161.0
3     170.0       NaN       NaN

a = np.arange(4, n + 4).astype(str)
cols = [item for x in a for item in ('offset_' + x, 'feature_' + x)]
df1 = pd.concat([df11, df22], axis=1)[cols]
print (df1)
   offset_4  feature_4  offset_5  feature_5  offset_6  feature_6
0     103.0     2200.0     109.0        0.0     113.0     2200.0
1     113.0     2200.0     126.0     2200.0     131.0     2200.0
2     157.0      200.0     159.0      200.0     161.0      200.0
3     170.0      200.0       NaN        NaN       NaN        NaN

m =  np.append(arr1[1:, 0] == 0, False) & (arr1[:, 0] != 0)
arr1 = arr1[m] 
arr2 = arr2[m] 

arr1[:1] = arr1[:1, ::-1]
arr2[:1] = arr2[:1, ::-1]

df1 = pd.DataFrame(arr1).add_prefix('feature_')
df2 = pd.DataFrame(arr2).add_prefix('offset_')
print (df1)
   feature_0  feature_1  feature_2
0        NaN     2000.0     2000.0
1     2200.0        0.0        0.0
2     2200.0     2200.0     2200.0
3      200.0      200.0      200.0

print (df2)
   offset_0  offset_1  offset_2
0       NaN       0.0       7.0
1     103.0      91.0      88.0
2     137.0     131.0     126.0
3     161.0     159.0     157.0

a = np.arange(n).astype(str)
cols = [item for x in a for item in ('offset_' + x, 'feature_' + x)]
df = pd.concat([df1, df2], axis=1)[cols]
print (df)
   offset_0  feature_0  offset_1  feature_1  offset_2  feature_2
0       NaN        NaN       0.0     2000.0       7.0     2000.0
1     103.0     2200.0      91.0        0.0      88.0        0.0
2     137.0     2200.0     131.0     2200.0     126.0     2200.0
3     161.0      200.0     159.0      200.0     157.0      200.0

相关问题更多 >

编程相关推荐

热门问题

热门文章