包含多个np.multipy语句的代码段优化

xh = np.multiply(K_Rinv[0, 0], x ) xh += np.multiply(K_Rinv[0, 1], y) xh += np.multiply(K_Rinv[0, 2], h) yh = np.multiply(K_Rinv[1, 0], x) yh += np.multiply(K_Rinv[1, 1], y) yh += np.multiply(K_Rinv[1, 2], h) q = np.multiply(K_Rinv[2, 0], x) q += np.multiply(K_Rinv[2, 1], y) q += np.multiply(K_Rinv[2, 2], h)

xh_1 = numexpr.evaluate('a1*b1+a2*b2+a3*b3', {'a1': K_Rinv[0, 0], 'b1': x, 'a2': K_Rinv[0, 1], 'b2': y, 'a3': K_Rinv[0, 2], 'b3': h}) yh_1 = numexpr.evaluate('a1*b1+a2*b2+a3*b3', {'a1': K_Rinv[1, 0], 'b1': x, 'a2': K_Rinv[1, 1], 'b2': y, 'a3': K_Rinv[1, 2], 'b3': h}) q_1 = numexpr.evaluate('a1*b1+a2*b2+a3*b3', {'a1': K_Rinv[2, 0], 'b1': x, 'a2': K_Rinv[2, 1], 'b2': y, 'a3': K_Rinv[2, 2], 'b3': h}) xh_2 = np.multiply(K_Rinv[0, 0], x ) xh_2 += np.multiply(K_Rinv[0, 1], y) xh_2 += np.multiply(K_Rinv[0, 2], h) yh_2 = np.multiply(K_Rinv[1, 0], x) yh_2 += np.multiply(K_Rinv[1, 1], y) yh_2 += np.multiply(K_Rinv[1, 2], h) q_2 = np.multiply(K_Rinv[2, 0], x) q_2 += np.multiply(K_Rinv[2, 1], y) q_2 += np.multiply(K_Rinv[2, 2], h) check1 = xh_1.all() == xh_2.all() check2 = yh_1.all() == yh_2.all() check3 = q_2.all() == q_1.all() print ( " Check1 :{} , Check2: {} , Check3:{}" .format (check1,check2,check3))

File "/usr/local/lib/python3.6/dist-packages/numba/dispatcher.py", line 420, in _compile_for_args raise e File "/usr/local/lib/python3.6/dist-packages/numba/dispatcher.py", line 353, in _compile_for_args return self.compile(tuple(argtypes)) File "/usr/local/lib/python3.6/dist-packages/numba/compiler_lock.py", line 32, in _acquire_compile_lock return func(*args, **kwargs) File "/usr/local/lib/python3.6/dist-packages/numba/dispatcher.py", line 768, in compile cres = self._compiler.compile(args, return_type) File "/usr/local/lib/python3.6/dist-packages/numba/dispatcher.py", line 77, in compile status, retval = self._compile_cached(args, return_type) File "/usr/local/lib/python3.6/dist-packages/numba/dispatcher.py", line 91, in _compile_cached retval = self._compile_core(args, return_type) File "/usr/local/lib/python3.6/dist-packages/numba/dispatcher.py", line 109, in _compile_core pipeline_class=self.pipeline_class) File "/usr/local/lib/python3.6/dist-packages/numba/compiler.py", line 551, in compile_extra return pipeline.compile_extra(func) File "/usr/local/lib/python3.6/dist-packages/numba/compiler.py", line 327, in compile_extra raise e File "/usr/local/lib/python3.6/dist-packages/numba/compiler.py", line 321, in compile_extra ExtractByteCode().run_pass(self.state) File "/usr/local/lib/python3.6/dist-packages/numba/untyped_passes.py", line 67, in run_pass bc = bytecode.ByteCode(func_id) File "/usr/local/lib/python3.6/dist-packages/numba/bytecode.py", line 215, in __init__ self._compute_lineno(table, code) File "/usr/local/lib/python3.6/dist-packages/numba/bytecode.py", line 237, in _compute_lineno known = table[_FIXED_OFFSET].lineno KeyError: 2

import numpy as np import numba as nb import numexpr from datetime import datetime def calc(x,y,h,K_Rinv): xh_2 = np.multiply(K_Rinv[0, 0], x ) xh_2 += np.multiply(K_Rinv[0, 1], y) xh_2 += np.multiply(K_Rinv[0, 2], h) yh_2 = np.multiply(K_Rinv[1, 0], x) yh_2 += np.multiply(K_Rinv[1, 1], y) yh_2 += np.multiply(K_Rinv[1, 2], h) q_2 = np.multiply(K_Rinv[2, 0], x) q_2 += np.multiply(K_Rinv[2, 1], y) q_2 += np.multiply(K_Rinv[2, 2], h) return xh_2, yh_2, q_2 def calc_numexpr(x,y,h,K_Rinv): xh = numexpr.evaluate('a1*b1+a2*b2+a3*b3', {'a1': K_Rinv[0, 0], 'b1': x, 'a2': K_Rinv[0, 1], 'b2': y, 'a3': K_Rinv[0, 2], 'b3': h}) yh = numexpr.evaluate('a1*b1+a2*b2+a3*b3', {'a1': K_Rinv[1, 0], 'b1': x, 'a2': K_Rinv[1, 1], 'b2': y, 'a3': K_Rinv[1, 2], 'b3': h}) q = numexpr.evaluate('a1*b1+a2*b2+a3*b3', {'a1': K_Rinv[2, 0], 'b1': x, 'a2': K_Rinv[2, 1], 'b2': y, 'a3': K_Rinv[2, 2], 'b3': h}) return xh,yh,q @nb.njit(fastmath=True,parallel=True) def calc_nb(x,y,h,K_Rinv): xh=np.empty_like(x) yh=np.empty_like(x) q=np.empty_like(x) for i in nb.prange(x.shape[0]): for j in range(x.shape[1]): xh[i,j]=K_Rinv[0, 0]*x[i,j]+K_Rinv[0, 1]* y[i,j]+K_Rinv[0, 2]*h[i,j] yh[i,j]=K_Rinv[1, 0]*x[i,j]+K_Rinv[1, 1]* y[i,j]+K_Rinv[1, 2]*h[i,j] q[i,j] =K_Rinv[2, 0]*x[i,j]+K_Rinv[2, 1]* y[i,j]+K_Rinv[2, 2]*h[i,j] return xh,yh,q x = np.random.random((4206, 5749)) y = np.random.random((4206, 5749)) h = np.random.random((4206, 5749)) K_Rinv = np.random.random((3, 3)) start = datetime.now() x_calc,y_calc,q_calc = calc(x,y,h,K_Rinv) end = datetime.now() print("Calc took: {} ".format(end - start)) start = datetime.now() x_numexpr,y_numexpr,q_numexpr = calc_numexpr(x,y,h,K_Rinv) end = datetime.now() print("Calc_numexpr took: {} ".format(end - start)) start = datetime.now() x_nb,y_nb,q_nb = calc_nb(x,y,h,K_Rinv) end = datetime.now() print("Calc nb took: {} ".format(end - start)) check_nb_q = (q_calc==q_nb).all() check_nb_y = (y_calc==y_nb).all() check_nb_x = (x_calc==x_nb).all() check_numexpr_q = (q_calc==q_numexpr).all() check_numexpr_y = (y_calc==y_numexpr).all() check_numexpr_x = (x_calc==x_numexpr).all() print("Checks for numexpr: {} , {} ,{} \nChecks for nb: {} ,{}, {}" .format(check_numexpr_x,check_numexpr_y,check_numexpr_q,check_nb_x,check_nb_y,check_nb_q))

3条回答

网友

1楼 · 编辑于 2024-05-13 23:34:44

另一种可能是使用Numba

示例

import numpy as np
import numba as nb

@nb.njit(fastmath=True,parallel=True)
def calc_nb(x,y,h,K_Rinv):
    xh=np.empty_like(x)
    yh=np.empty_like(x)
    q=np.empty_like(x)

    for i in nb.prange(x.shape[0]):
        for j in range(x.shape[1]):
            xh[i,j]=K_Rinv[0, 0]*x[i,j]+K_Rinv[0, 1]*  y[i,j]+K_Rinv[0, 2]*h[i,j]
            yh[i,j]=K_Rinv[1, 0]*x[i,j]+K_Rinv[1, 1]*  y[i,j]+K_Rinv[1, 2]*h[i,j]
            q[i,j] =K_Rinv[2, 0]*x[i,j]+K_Rinv[2, 1]*  y[i,j]+K_Rinv[2, 2]*h[i,j]
    return xh,yh,q

此计算内存带宽有限吗？

def copy(x,y,h,K_Rinv):
    return np.copy(x),np.copy(y),np.copy(h)

%timeit copy(x,y,h,K_Rinv)
#147 ms ± 4.98 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)

此计算受到内存带宽和动态内存分配的限制，两者之间的乘法与性能无关

计时

x = np.random.random((4206, 5749))
y = np.random.random((4206, 5749))
h = np.random.random((4206, 5749))
K_Rinv = np.random.random((3, 3))

%timeit calc(x,y,h,K_Rinv) #Your implementation
#581 ms ± 8.05 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
%timeit calc_nb(x,y,h,K_Rinv)
#145 ms ± 3.81 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
%timeit calc_numexpr_scleronomic(x,y,h,K_Rinv)
#175 ms ± 1.83 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
%timeit calc_Daniel_F(x,y,h,K_Rinv)
#589 ms ± 24.6 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)

可能的进一步优化：重用已分配的内存

@nb.njit(fastmath=True,parallel=True)
def calc_nb_2(x,y,h,K_Rinv,xh,yh,q):
    for i in nb.prange(x.shape[0]):
        for j in range(x.shape[1]):
            xh[i,j]=K_Rinv[0, 0]*x[i,j]+K_Rinv[0, 1]*  y[i,j]+K_Rinv[0, 2]*h[i,j]
            yh[i,j]=K_Rinv[1, 0]*x[i,j]+K_Rinv[1, 1]*  y[i,j]+K_Rinv[1, 2]*h[i,j]
            q[i,j] =K_Rinv[2, 0]*x[i,j]+K_Rinv[2, 1]*  y[i,j]+K_Rinv[2, 2]*h[i,j]
    return xh,yh,q

#allocate memory only once if you call this function repeatedly
xh=np.empty_like(x)
yh=np.empty_like(x)
q=np.empty_like(x)

%timeit calc_nb_2(x,y,h,K_Rinv,xh,yh,q)
69.2 ms ± 194 µs per loop (mean ± std. dev. of 7 runs, 1 loop each)

网友

2楼 · 编辑于 2024-05-13 23:34:44

您也可以使用numexpr来加速计算：

import numpy as np
import numexpr

x = np.random.random((4206, 5749))
y = np.random.random((4206, 5749))
h = np.random.random((4206, 5749))
K_Rinv = np.random.random((3, 3))

xh = numexpr.evaluate('a1*b1+a2*b2+a3*b3', {'a1': K_Rinv[0, 0], 'b1': x,
                                            'a2': K_Rinv[0, 1], 'b2': y,
                                            'a3': K_Rinv[0, 2], 'b3': h})
yh = numexpr.evaluate('a1*b1+a2*b2+a3*b3', {'a1': K_Rinv[1, 0], 'b1': x,
                                            'a2': K_Rinv[1, 1], 'b2': y,
                                            'a3': K_Rinv[1, 2], 'b3': h})
q = numexpr.evaluate('a1*b1+a2*b2+a3*b3', {'a1': K_Rinv[2, 0], 'b1': x,
                                           'a2': K_Rinv[2, 1], 'b2': y,
                                           'a3': K_Rinv[2, 2], 'b3': h})

在我的机器上，这比没有numexpr的速度快5倍左右

另一件事是，如果您处理的是矩阵，而不是分解乘法和加法，我更愿意使用矩阵乘法和numpy广播：

xyh_mat = np.concatenate([x[:, :, np.newaxis],
                          y[:, :, np.newaxis],
                          h[:, :, np.newaxis]], axis=-1)[:, :, :, np.newaxis]  
# (4206, 5749, 3, 1)
K_Rinv_mat = K_Rinv[np.newaxis, np.newaxis, :, :]  
# (1, 1, 3, 3)


xyh_mat_2 = np.einsum("ijkl, ijlk->ijk", K_Rinv_mat, xyh_mat)
# 1.25x faster

xyh_mat_2 = K_Rinv_mat @ xyh_mat
# 3x slower

# xh = xyh_mat_2[:, :, 0]
# yh = xyh_mat_2[:, :, 1]
# q = xyh_mat_2[:, :, 2]

然而，在这种情况下使用numpy似乎没有速度优势，这让我有点吃惊

编辑

关于进一步计算的意见：

np.divide(xh, q, x)
np.divide(yh, q, y)
# should translate to:
x = numexpr.evaluate('a/b', {'a': xh , 'b': q })
y = numexpr.evaluate('a/b', {'a': yh , 'b': q })

网友

3楼 · 编辑于 2024-05-13 23:34:44

非常确定这是一个简单且扩展的dot产品：

x_y_h = np.stack([x, y, h], axis = 0)
xh_yh_q = np.einsum('ij, jkl -> ikl', K_Rinv, x_y_h)
[xh, yh, q] = list(xh_yh_q)

编辑

相关问题更多 >

编程相关推荐

热门问题

热门文章