如何将带条件的切片中Numpy的累积和应用于上一个值?

2024-03-29 05:32:51 发布

您现在位置:Python中文网/ 问答频道 /正文

我有一个向量,其信号值为1-1。我想要第二个向量,它计算具有相同值的连续信号的累积和,并在每次信号改变时重新启动累积和。以下是一个例子:

signal  = [1  1  1 -1 -1 -1 -1]

cum_sum = [1  2  3 -1 -2 -3 -4]

我需要计算大量数据,希望尽可能高效地进行计算。 我的代码现在可以完成这项工作,但它需要时间,而且没有利用numpy的效率:

import numpy as np

# Signal values to be analyzed
signal = np.array([1,1,1,-1,-1,-1,-1], dtype=int)

# Vector with previous value of signal
signal_prev = signal[:-1]
signal_prev = np.pad(signal_prev,(1,0), mode='constant', constant_values=(0))

#Array with signal values in first column and previous values in second column 
arr = np.array([signal,signal_prev], dtype=int)
arr = np.transpose(arr)

print(arr)
""" Array with signal values and previous values
[[ 1  0]
 [ 1  1]
 [ 1  1]
 [-1  1]
 [-1 -1]
 [-1 -1]
 [-1 -1]]
"""

#create an empty array to append cumulative sum
signal_sum = np.array([], dtype=int)

# compute the cumulative sum iterating row by row
for x in arr:
    if np.sign(x[0]*x[1]) > 0:
        signal_sum = np.append(signal_sum, signal_sum[-1] + x[1])
    else:
        signal_sum= np.append(signal_sum, x[0])

arr_sum = np.array([signal, signal_sum])
arr_sum = np.transpose(arr_sum)
print(arr_sum)
""" Array with signal values and cumulative sum restarted with signal change
[[ 1  1]
 [ 1  2]
 [ 1  3]
 [-1 -1]
 [-1 -2]
 [-1 -3]
 [-1 -4]]
"""

我相信使用numpy函数或lambda函数可以更有效地进行此计算。我不是程序员,而且我是Python新手。我想知道这是否可以做得更快


Tags: innumpysignal信号withnparrayint
3条回答

对于快速、完全矢量化的方式(无循环),您可以使用常规的np.cumsum(),但在数组的副本上,您可以在每个组的开头减去上一个组的和:

def group_cumsum(s):
    # make a copy and ensure np.array (in case list was given)
    s = np.array(s).copy()
    idx = np.nonzero(np.diff(s))[0]  # last of each group
    off = np.diff(np.concatenate(([0], np.cumsum(s)[idx])))
    s[idx + 1] -= off
    return np.cumsum(s)

例如:

print(group_cumsum([1, 1, 1, -1, -1, -1, -1]))
# [ 1  2  3 -1 -2 -3 -4]

print(group_cumsum([1]*3 + [-1]*2 + [1]*4 + [-1]*5))
# [ 1  2  3 -1 -2  1  2  3  4 -1 -2 -3 -4 -5]

对于大型阵列,节省的时间非常可观

  1. Python代码中没有循环,所有操作都是矢量化的,并且
  2. 在大小为n的数组中k组是O(n + k)(不同于O(n * k)的其他解决方案)

试试这个:

s = np.random.choice([1, -1], size=(int(1e6)))

%%timeit
group_cumsum(s)

19.1 ms ± 137 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)

不知怎的,在我开始之前,这在我的脑海里看起来更容易。幸运的是,与此同时,其他人已经提出了解决方案。因此,是时候将我的解决方案与其他解决方案进行比较了:

import numpy as np
from timeit import timeit

# original
def f1(signal):
    signal_prev = signal[:-1]
    signal_prev = np.pad(signal_prev,(1,0), mode='constant', constant_values=(0))
    arr = np.array([signal,signal_prev], dtype=int)
    arr = np.transpose(arr)
    
    signal_sum = np.array([], dtype=int)

    for x in arr:
        if np.sign(x[0]*x[1]) > 0:
            signal_sum = np.append(signal_sum, signal_sum[-1] + x[1])
        else:
            signal_sum= np.append(signal_sum, x[0])
    
    arr_sum = np.array([signal, signal_sum])
    return np.transpose(arr_sum)


#Pierre D
def f2(s):
    # make a copy and ensure np.array (in case list was given)
    s = np.array(s).copy()
    idx = np.nonzero(np.diff(s))[0]  # last of each group
    off = np.diff(np.concatenate(([0], np.cumsum(s)[idx])))
    s[idx + 1] -= off
    return np.cumsum(s)


#sai
def f3(signal):
    normal_cumsum = np.cumsum(signal)

    reset_sums = np.roll(np.where(np.diff(np.sign(signal), append=np.sign(signal[-1])) != 0, normal_cumsum, np.zeros_like(signal)), 1)
    ffill_idxs = np.hstack((np.squeeze(np.argwhere(reset_sums != 0)), np.array([len(signal)])))
    
    for start, end in zip(ffill_idxs[:-1], ffill_idxs[1:]):
        reset_sums[start:end] = reset_sums[start]

    return normal_cumsum - reset_sums


#Tis Chris sawtooth 0 
def f4(arr):
    cumsum_ix = np.zeros_like(arr)
    cumsum_ix[ 1: ] = (( arr[ :-1 ] - arr[ 1: ] ) != 0 ).cumsum()
    # cumsum_ix incrememnts for each sign change. 

    result = np.zeros_like( arr )
    for i in range( 0, cumsum_ix[ -1 ] + 1 ):
        # For each cumsum_ix select those items and generate the cumsums.
        result[ cumsum_ix==i ] = arr[ cumsum_ix==i ].cumsum()
    return result
    

#c'est moi
def f5(signal):
    ind = np.where(np.diff(signal, prepend=0).astype(bool))
    signalcount = signal.copy()
    signalcount[ind] = signalcount[ind] + np.diff(ind, prepend=0) * np.sign(signal[ind])
    return signalcount.cumsum() 

首先,让我们检查它们是否都返回相同的结果。事实证明,sai的解决方案有时会生成错误的数组:

nxd = 20    #array length    
s = np.random.choice([1, -1], size=nxd)

#Integrity check
print(f1(s).T[1])
#[-1 -2  1 -1  1  2 -1 -2 -3  1  2  3 -1 -2  1 -1  1 -1  1 -1]
print(f2(s))
#[-1 -2  1 -1  1  2 -1 -2 -3  1  2  3 -1 -2  1 -1  1 -1  1 -1]
print(f3(s)) 
#sometimes the counting is incorrect
#[-1 -2  1 -1  1  2  1  ->0<- -1  1  2  3  2  1  1 -1  1 -1  1 -1]
print(f4(s))
#[-1 -2  1 -1  1  2 -1 -2 -3  1  2  3 -1 -2  1 -1  1 -1  1 -1]
print(f5(s))
#[-1 -2  1 -1  1  2 -1 -2 -3  1  2  3 -1 -2  1 -1  1 -1  1 -1]

现在,较短阵列的计时:

ntime = 50 #number of test runs
nxd = 2000    #array length

s = np.random.choice([1, -1], size=nxd)

print(timeit(lambda: f1(s), number=ntime))
#1.1028546000000001

print(timeit(lambda: f2(s), number=ntime))
#0.004060500000000022   <--- best timing

print(timeit(lambda: f3(s), number=ntime))
#0.0505989

print(timeit(lambda: f4(s), number=ntime))
#0.4808455999999999

print(timeit(lambda: f5(s), number=ntime))
#0.0046319999999999695

和更长的阵列:

ntime = 5 #number of test runs
nxd = int(1e6)    #array length

s = np.random.choice([1, -1], size=nxd)

#print(timeit(lambda: f1(s), number=ntime))
#It took too long to wait for this result

print(timeit(lambda: f2(s), number=ntime))
#0.22104029999999986  <--- again the best timing

print(timeit(lambda: f3(s), number=ntime))
#2.402051

#print(timeit(lambda: f4(s), number=ntime))
#took too long

print(timeit(lambda: f5(s), number=ntime))
#0.2280369000000002

因此,Pierre D的建议是您的最佳选择(到目前为止)

2种可能的方法sawtooth1使用与@sai相同的思想

import numpy as np

arr = np.array([ 1,1,1,1,1,-1,-1,-1,-1,1,1,1,1,-1,-1,-1])
arr
# Out[3]: array([ 1,  1,  1,  1,  1, -1, -1, -1, -1,  1,  1,  1,  1, -1, -1, -1])


def sawtooth0( arr ):
    cumsum_ix = np.zeros_like(arr)
    cumsum_ix[ 1: ] = (( arr[ :-1 ] - arr[ 1: ] ) != 0 ).cumsum()
    # cumsum_ix incrememnts for each sign change. 

    result = np.zeros_like( arr )
    for i in range( 0, cumsum_ix[ -1 ] + 1 ):
        # For each cumsum_ix select those items and generate the cumsums.
        result[ cumsum_ix==i ] = arr[ cumsum_ix==i ].cumsum()
    return result

sawtooth0( arr )
# Out[21]: array([ 1,  2,  3,  4,  5, -1, -2, -3, -4,  1,  2,  3,  4, -1, -2, -3])

def sawtooth1( arr ):
    cumsum_ix = np.zeros_like(arr)
    cumsum_ix[ 1: ] = (( arr[ :-1 ] - arr[ 1: ] ) != 0 ).cumsum()
    # cumsum_ix incrememnts for each sign change. 

    totals = np.zeros( cumsum_ix[ -1 ] + 1, dtype = np.int )
    for i in range( 0, cumsum_ix[ -1 ] ):
        # For each cumsum_ix select those items and generate the sum.
        totals[ i+1 ] = arr[ cumsum_ix == i ].sum()

    totals = totals.cumsum()
    return arr.cumsum() - totals[ cumsum_ix ]
    # subtract the cumulative previous totalsfor each 

sawtooth1( arr )
# Out[22]: array([ 1,  2,  3,  4,  5, -1, -2, -3, -4,  1,  2,  3,  4, -1, -2, -3])

两者的时间差是微不足道的

相关问题 更多 >