这个python函数可以矢量化吗?

2024-06-01 02:21:44 发布

您现在位置:Python中文网/ 问答频道 /正文

我一直在做这个函数,它为我正在开发的模拟代码生成一些我需要的参数,并在提高其性能方面遇到了困难。你知道吗

对代码进行分析表明,这是主要的瓶颈,因此我可以对其进行任何增强(无论多么微小)。你知道吗

我想尝试矢量化这个函数的一部分,但我不确定这是否可能。你知道吗

主要的挑战是存储在数组params中的参数取决于参数的索引。我看到的唯一简单的解决方案是使用np.ndenumerate,但这似乎相当缓慢。你知道吗

在数组中存储的值取决于存储位置的情况下,是否可以对这种类型的操作进行矢量化?或者,创建一个只提供数组索引元组的生成器会更聪明/更快吗?你知道吗

import numpy as np
from scipy.sparse import linalg as LA

def get_params(num_bonds, energies):
    """
    Returns the interaction parameters of different pairs of atoms.

    Parameters
    ----------
    num_bonds : ndarray, shape = (M, 20)
        Sparse array containing the number of nearest neighbor bonds for 
        different pairs of atoms (denoted by their column) and next-
        nearest neighbor bonds. Columns 0-9 contain nearest neighbors, 
        10-19 contain next-nearest neighbors

    energies : ndarray, shape = (M, )
        Energy vector corresponding to each atomic system stored in each 
        row of num_bonds.
    """

    # -- Compute the bond energies
    x = LA.lsqr(num_bonds, energies, show=False)[0]

    params = np.zeros([4, 4, 4, 4, 4, 4, 4, 4, 4])

    nn = {(0,0): x[0], (1,1): x[1], (2,2): x[2], (3,3): x[3], (0,1): x[4],
          (1,0): x[4], (0,2): x[5], (2,0): x[5], (0,3): x[6], (3,0): x[6],
          (1,2): x[7], (2,1): x[7], (1,3): x[8], (3,1): x[8], (2,3): x[9],
          (3,2): x[9]}

    nnn = {(0,0): x[10], (1,1): x[11], (2,2): x[12], (3,3): x[13], (0,1): x[14],
           (1,0): x[14], (0,2): x[15], (2,0): x[15], (0,3): x[16], (3,0): x[16],
           (1,2): x[17], (2,1): x[17], (1,3): x[18], (3,1): x[18], (2,3): x[19],
           (3,2): x[19]}

    """
    params contains the energy contribution of each site due to its
    local environment. The shape is given by the number of possible atom
    types and the number of sites in the lattice.
    """
    for (i,j,k,l,m,jj,kk,ll,mm), val in np.ndenumerate(params):

        params[i,j,k,l,m,jj,kk,ll,mm] = nn[(i,j)] + nn[(i,k)] + nn[(i,l)] + \
                                        nn[(i,m)] + nnn[(i,jj)] + \
                                        nnn[(i,kk)] + nnn[(i,ll)] + nnn[(i,mm)]

return np.ascontiguousarray(params)

Tags: ofthenumber参数npnn数组params
1条回答
网友
1楼 · 发布于 2024-06-01 02:21:44

这是一种使用^{}求和的矢量化方法-

# Gather the elements sorted by the keys in (row,col) order of a dense 
# 2D array for both nn and nnn
sidx0 = np.ravel_multi_index(np.array(nn.keys()).T,(4,4)).argsort()
a0 = np.array(nn.values())[sidx0].reshape(4,4)

sidx1 = np.ravel_multi_index(np.array(nnn.keys()).T,(4,4)).argsort()
a1 = np.array(nnn.values())[sidx1].reshape(4,4)

# Perform the summations keep the first axis aligned for nn and nnn parts
parte0 = a0[:,:,None,None,None] + a0[:,None,:,None,None] + \
     a0[:,None,None,:,None] + a0[:,None,None,None,:]

parte1 = a1[:,:,None,None,None] + a1[:,None,:,None,None] + \
     a1[:,None,None,:,None] + a1[:,None,None,None,:]    

# Finally add up sums from nn and nnn for final output    
out = parte0[...,None,None,None,None] + parte1[:,None,None,None,None]

运行时测试

函数定义-

def vectorized_approach(nn,nnn):
    sidx0 = np.ravel_multi_index(np.array(nn.keys()).T,(4,4)).argsort()
    a0 = np.array(nn.values())[sidx0].reshape(4,4)    
    sidx1 = np.ravel_multi_index(np.array(nnn.keys()).T,(4,4)).argsort()
    a1 = np.array(nnn.values())[sidx1].reshape(4,4)
    parte0 = a0[:,:,None,None,None] + a0[:,None,:,None,None] + \
         a0[:,None,None,:,None] + a0[:,None,None,None,:]    
    parte1 = a1[:,:,None,None,None] + a1[:,None,:,None,None] + \
         a1[:,None,None,:,None] + a1[:,None,None,None,:]
    return parte0[...,None,None,None,None] + parte1[:,None,None,None,None]

def original_approach(nn,nnn):
    params = np.zeros([4, 4, 4, 4, 4, 4, 4, 4, 4])
    for (i,j,k,l,m,jj,kk,ll,mm), val in np.ndenumerate(params):    
        params[i,j,k,l,m,jj,kk,ll,mm] = nn[(i,j)] + nn[(i,k)] + nn[(i,l)] + \
                                        nn[(i,m)] + nnn[(i,jj)] + \
                                        nnn[(i,kk)] + nnn[(i,ll)] + nnn[(i,mm)]
    return params

设置输入-

# Setup inputs
x = np.random.rand(30)
nn = {(0,0): x[0], (1,1): x[1], (2,2): x[2], (3,3): x[3], (0,1): x[4],
      (1,0): x[4], (0,2): x[5], (2,0): x[5], (0,3): x[6], (3,0): x[6],
      (1,2): x[7], (2,1): x[7], (1,3): x[8], (3,1): x[8], (2,3): x[9],
      (3,2): x[9]}

nnn = {(0,0): x[10], (1,1): x[11], (2,2): x[12], (3,3): x[13], (0,1): x[14],
       (1,0): x[14], (0,2): x[15], (2,0): x[15], (0,3): x[16], (3,0): x[16],
       (1,2): x[17], (2,1): x[17], (1,3): x[18], (3,1): x[18], (2,3): x[19],
       (3,2): x[19]}

计时-

In [98]: np.allclose(original_approach(nn,nnn),vectorized_approach(nn,nnn))
Out[98]: True

In [99]: %timeit original_approach(nn,nnn)
1 loops, best of 3: 884 ms per loop

In [100]: %timeit vectorized_approach(nn,nnn)
1000 loops, best of 3: 708 µs per loop

欢迎使用1000x+加速!你知道吗


对于这类外部产品的泛型数系统,这里有一个泛型解决方案,它迭代这些维度-

m,n = a0.shape # size of output array along each axis
N = 4  # Order of system
out = a0.copy()
for i in range(1,N):
    out = out[...,None] + a0.reshape((m,)+(1,)*i+(n,))

for i in range(N):
    out = out[...,None] + a1.reshape((m,)+(1,)*(i+n)+(n,))

相关问题 更多 >