计算每个箱子的最大数量

import numpy as np import time shape = ( 20, 30, 40 ) ndata = int( 1e6 ) data = np.random.normal( loc = 10, scale = 5, size = ndata ) coords = np.vstack( [ np.random.uniform( 0, shape[i], ndata ) for i in range( len( shape ) ) ] ).T max_data = np.zeros( shape ) start = time.time() for i in range( len( data ) ) : # shortcut to find bin indices when the bins are # [ range( shape[i] ) for i in range( len( shape ) ) ] bin_indices = tuple( coords[i].astype( int ) ) max_data[ bin_indices ] = max( max_data[ bin_indices ], data[ i ] ) elapsed = time.time() - start print( 'elapsed: %.3e' % elapsed ) # 2.98 seconds on my computer

1条回答

网友

1楼 · 发布于 2024-05-19 02:29:00

使用来自https://stackoverflow.com/a/55226663/7207392的第二快的解决方案可以给我>30x加速。如果您愿意使用pythran，则可以使用更快的解决方案。你知道吗

import numpy as np
from scipy import sparse
import time

shape = ( 20, 30, 40 )

ndata = int( 1e6 )

data = np.random.normal(  loc = 10, scale = 5, size = ndata ) 

coords = np.vstack( [ np.random.uniform( 0, shape[i], ndata )
                      for i in range( len( shape ) ) ] ).T

max_data = np.zeros( shape ) 

start = time.time()

for i in range( len( data ) ) :

    # shortcut to find bin indices when the bins are
    # [ range( shape[i] ) for i in range( len( shape ) ) ]

    bin_indices = tuple( coords[i].astype( int ) )  

    max_data[ bin_indices ] = max( max_data[ bin_indices ], data[ i ] )

elapsed = time.time() - start

print( 'elapsed: %.3e' % elapsed )  # 2.98 seconds on my computer 


start = time.time()

bin_indices = np.ravel_multi_index(coords.astype(int).T, shape)
aux = sparse.csr_matrix((data, bin_indices, np.arange(data.size+1)),
                        (data.size, np.prod(shape))).tocsc()
cut = aux.indptr.searchsorted(data.size)
max_data_pp = np.empty(shape)
max_data_pp.ravel()[:cut] = np.maximum.reduceat(aux.data, aux.indptr[:cut])

CLIPAT = 0

max_data_pp.ravel()[aux.indptr[:-1]==aux.indptr[1:]] = CLIPAT
max_data_pp[max_data_pp < CLIPAT] = CLIPAT

elapsed = time.time() - start

print( 'elapsed: %.3e' % elapsed )  # 2.98 seconds on my computer 


assert np.all(max_data == max_data_pp)

运行示例：

elapsed: 2.417e+00
elapsed: 6.387e-02

相关问题更多 >

编程相关推荐

热门问题

热门文章