平均重叠婚姻？

stride = z ranges = [] # will contain sub lists of start / end positions for i in range(0, n, stride): if i + s > n: ranges.append([n-s, n]) #<-- if not evenly divisible include last ragged bit break else: ranges.append([i, i+s]) # k = len(ranges) for a, b in ranges: submat = mat[a:b] # <--- produces submats of shape (s, m) # not necessarily where submats come from, just for # simple example purpose, feel free to add random noise to each submat

n = 693 m = 10 # so mat has shape (693, 10) s = 500 stride = 50 ranges = [[0, 500], [50, 550], [100, 600], [150, 650], [193, 693]] # notice that the range (0,50) doesn't need to be averaged k = 5 # len(ranges) # so we have k submats of shape (500, 10)

def count_overlap(max_len, ranges): # from example 693, and [[0, 500], ...] tally = np.zeros(max_len) for i in range(max_len): for a, b in ranges: if a <= i and i < b: tally[i] += 1 return tally olap = count_overlap(693, ranges) olap[:55] # ([1., 1., ..., 1., 2., 2., 2., 2., 2.]) olap[-50:] # ([2., 2., 2., 2., 2., 2., 2., 2., 1., 1., ..., 1., 1., 1.])

1条回答

网友

1楼 · 发布于 2024-05-29 06:18:57

在我自己的问题中，我提供了另一种（不那么优雅的）方法，但我并没有试图回答我自己的问题。相反，我只是简单地包装了相关的功能和解决方案，供其他人使用：

助手

def shard_rng(maxlen, sublen, stride):
    ranges = [] 
    for i in range(0, maxlen, stride):
        if i + sublen > maxlen:
            ranges.append([maxlen-sublen, maxlen])
            break
        else:
            ranges.append([i, i+sublen])
    return ranges

# for testing. stitched_mat - mat should be 1
def split_mat(mat, ranges):
    submats = []
    for a,b in ranges:
        submats.append(mat[a:b] + 1)
    return submats

# part of solution 1
def weight_rngs(ranges):
    n = ranges[-1][-1]
    bins = map(np.bincount,np.array(ranges).T,(None,None),(n+1,n+1))
    vals = np.subtract(*bins).cumsum()
    weights = 1 / vals[:n,None]
    return weights

解决方案

由@Paul Panzer提供

# solution 1
def stitch_mats(shape, submats, ranges):    
    stitched = np.zeros(shape)
    weights = weight_rngs(ranges)
    for submat, (start, stop) in zip(submats, ranges):
        stitched[start:stop] += weights[start:stop] * submat        
    return stitched

# solution 2
def stitch_mats2(shape, submats, ranges):
    ranges = np.array(ranges)
    ro = ranges.ravel().argsort(kind='stable')

    # put 1 for starting and -1 for ending, take cumsum
    cnts = (1-((ro&1)<<1)).cumsum()

    stitched = np.zeros((n,m))
    # add slices
    for submat, (start, stop) in zip(submats,ranges):
        stitched[start:stop] += submat

    rs = ranges.ravel()[ro]
    # divide by overlap
    for start, stop, count in zip(rs[:-1],rs[1:],cnts[:-1]):
        stitched[start:stop] /= count
    return stitched

试验

n = 693
m = 10
s = 500 # sublen
stride = 50

mat = np.random.randint(0,10,(n,m))
ranges = shard_rng(n, s, stride)
submats = split_mat(mat, ranges)


stitched_1 = stitch_mats(mat.shape, submats, ranges)
stitched_2 = stitch_mats2(mat.shape, submats, ranges)

np.unique(stitched_1-mat-1.), np.unique(stitched_2-mat-1.)
# array([-8.8817842e-16,  0.0000000e+00,  4.4408921e-16,  8.8817842e-16]), array([0.])

网友
2楼 · 发布于 2024-05-29 06:18:57

下面是使用bincount+cumsum逐行计算重叠的方法：
更新：添加了另一个只使用切片的方法。我希望这会更快。你知道吗
import numpy as np n = 693 m = 10 # so mat has shape (693, 10) s = 500 stride = 50 ranges = [[0, 500], [50, 550], [100, 600], [150, 650], [193, 693]] # notice that the range (0,50) doesn't need to be averaged k = 5 # len(ranges) mat = np.random.randint(0,10,(n,m)) submats = [] for a, b in ranges: submats.append(mat[a:b]) ranges = np.asarray(ranges) out = np.zeros((n,m)) # put a 1 at every start and a -1 at every stop # then take the cumsum this will assign to each row the # number of intervals it is in # finally, take the reciprocal weight = 1 / np.subtract(*map(np.bincount,ranges.T,(None,None),(n+1,n+1))).cumsum()[:n,None] for sm,(a,b) in zip(submats,ranges): out[a:b] += weight[a:b] * sm # method 2 # sort range ends ro = ranges.ravel().argsort(kind='stable') # put 1 for starting and -1 for ending, take cumsum cnts = (1-((ro&1)<<1)).cumsum() out = np.zeros((n,m)) # add slices for sm,(a,b) in zip(submats,ranges): out[a:b] += sm rs = ranges.ravel()[ro] # divide by overlap for a,b,c in zip(rs[:-1],rs[1:],cnts[:-1]): out[a:b] /= c

助手

解决方案

试验

相关问题更多 >

编程相关推荐

热门问题

热门文章