优化与Numpy向量化：重复一组数组

from pandas import DataFrame import numpy as np from time import time list_away = [] new_data_morphed = DataFrame(np.random.rand(1000, 5)).transpose().as_matrix() group_by_strat_full = DataFrame([ [0, 1, 4], [1, 2, 3], [2, 3, 4], [0, 1, 4]], columns=['add_mask_1', 'add_mask_2', 'add_mask_3', ]) all_the_core_strats = [lambda x: x.prod(), lambda x: sum(x), ] def run_strat_mod(df_vals, core_strat, dict_mask_1, dict_mask_2, dict_mask_3, list_away): slice_df = df_vals[[dict_mask_1, dict_mask_2, dict_mask_3]] # TODO: this comprehension list should be vectorized to_append = [np.apply_along_axis(lambda x: u(x), 0, slice_df) for u in core_strat] list_away.append(to_append) t1 = time() results_3 = group_by_strat_full.apply(lambda x: run_strat_mod( new_data_morphed, all_the_core_strats, x['add_mask_1'], x['add_mask_2'], x['add_mask_3'], list_away), axis=1) t2 = time() print(abs(t1 - t2))

print(slice_df) [[[ 0.91302268 0.6172959 0.05478723 ..., 0.37028638 0.52116891 0.14158221] [ 0.72579223 0.78732047 0.61335979 ..., 0.46359203 0.27593171 0.73700975] [ 0.21706977 0.87639447 0.44936619 ..., 0.44319643 0.53712003 0.8071096 ]]

slice_df = np.array([df_vals[[dict_mask_1, dict_mask_2, dict_mask_3]]] * len(core_strat)) print(slice_df) [[[ 0.91302268 0.6172959 0.05478723 ..., 0.37028638 0.52116891 0.14158221] [ 0.72579223 0.78732047 0.61335979 ..., 0.46359203 0.27593171 0.73700975] [ 0.21706977 0.87639447 0.44936619 ..., 0.44319643 0.53712003 0.8071096 ]] [[ 0.91302268 0.6172959 0.05478723 ..., 0.37028638 0.52116891 0.14158221] [ 0.72579223 0.78732047 0.61335979 ..., 0.46359203 0.27593171 0.73700975] [ 0.21706977 0.87639447 0.44936619 ..., 0.44319643 0.53712003 0.8071096 ]]]

1条回答

网友

1楼 · 发布于 2024-05-14 15:14:37

def foo(x):
   print(x)  # add for diagnosis
   return [x[0].prod(), x[1].sum()]

将其应用于三维阵列，为简单起见，我将使用：

In [64]: x=np.arange(2*3*2).reshape(2,3,2)

In [66]: np.apply_along_axis(foo,0,x)
[0 6]
[1 7]
[2 8]
[3 9]
[ 4 10]
[ 5 11]
Out[66]: 
array([[[ 0,  1],
        [ 2,  3],
        [ 4,  5]],

       [[ 6,  7],
        [ 8,  9],
        [10, 11]]])

所以apply_along_axis传递给foo，x[:,0,0]，x[:,0,1]，x[:,1,0]，等等。对这两个数字进行prod和sum不是很令人兴奋

apply_along_axis只是一种方便的方法：

for i in range(x.shape[1]):
   for j in range(x.shape[2]):
      ret[:,i,j] = foo(x[:,i,j])

相关问题更多 >

编程相关推荐

热门问题

热门文章