如何在两个轴上重复数组元素?
我想要在数组的第0轴和第1轴上分别重复元素M次和N次:
import numpy as np
a = np.arange(12).reshape(3, 4)
b = a.repeat(2, 0).repeat(2, 1)
print(b)
[[ 0 0 1 1 2 2 3 3]
[ 0 0 1 1 2 2 3 3]
[ 4 4 5 5 6 6 7 7]
[ 4 4 5 5 6 6 7 7]
[ 8 8 9 9 10 10 11 11]
[ 8 8 9 9 10 10 11 11]]
这个方法可以实现,但我想知道有没有更好的方法,不需要创建一个临时数组。
5 个回答
6
因为这个结果不能作为一个视图来实现,所以as_strided
并没有比简单的预分配和广播更有优势。实际上,由于它的额外开销,as_strided
似乎反而有点慢(不过我没有进行正式的性能测试)。
这个as_strided
的代码来自于@AlexRiley的帖子。
from numpy.lib.stride_tricks import as_strided
import numpy as np
def tile_array(a, b0, b1):
r, c = a.shape # number of rows/columns
rs, cs = a.strides # row/column strides
x = as_strided(a, (r, b0, c, b1), (rs, 0, cs, 0)) # view a as larger 4D array
return x.reshape(r*b0, c*b1) # create new 2D array
def tile_array_pp(a, b0, b1):
r, c = a.shape
out = np.empty((r, b0, c, b1), a.dtype)
out[...] = a[:, None, :, None]
return out.reshape(r*b0, c*b1)
a = np.arange(9).reshape(3, 3)
kwds = {'globals': {'f_ar': tile_array, 'f_pp': tile_array_pp, 'a': a},
'number': 1000}
from timeit import timeit
print('as_strided', timeit('f_ar(a, 100, 100)', **kwds))
print('broadcast ', timeit('f_pp(a, 100, 100)', **kwds))
示例运行:
as_strided 0.048387714981799945
broadcast 0.04324757700669579
11
你可以在这里使用 np.broadcast_to
:
def broadcast_tile(a, h, w):
x, y = a.shape
m, n = x * h, y * w
return np.broadcast_to(
a.reshape(x, 1, y, 1), (x, h, y, w)
).reshape(m, n)
broadcast_tile(a, 2, 2)
array([[ 0, 0, 1, 1, 2, 2, 3, 3],
[ 0, 0, 1, 1, 2, 2, 3, 3],
[ 4, 4, 5, 5, 6, 6, 7, 7],
[ 4, 4, 5, 5, 6, 6, 7, 7],
[ 8, 8, 9, 9, 10, 10, 11, 11],
[ 8, 8, 9, 9, 10, 10, 11, 11]])
性能
函数
def chris(a, h, w):
x, y = a.shape
m, n = x * h, y * w
return np.broadcast_to(
a.reshape(x, 1, y, 1), (x, h, y, w)
).reshape(m, n)
def alex_riley(a, b0, b1):
r, c = a.shape
rs, cs = a.strides
x = np.lib.stride_tricks.as_strided(a, (r, b0, c, b1), (rs, 0, cs, 0))
return x.reshape(r*b0, c*b1)
def paul_panzer(a, b0, b1):
r, c = a.shape
out = np.empty((r, b0, c, b1), a.dtype)
out[...] = a[:, None, :, None]
return out.reshape(r*b0, c*b1)
def wim(a, h, w):
return np.kron(a, np.ones((h,w), dtype=a.dtype))
设置
import numpy as np
import pandas as pd
from timeit import timeit
res = pd.DataFrame(
index=['chris', 'alex_riley', 'paul_panzer', 'wim'],
columns=[5, 10, 20, 50, 100, 500, 1000],
dtype=float
)
a = np.arange(100).reshape((10,10))
for f in res.index:
for c in res.columns:
h = w = c
stmt = '{}(a, h, w)'.format(f)
setp = 'from __main__ import h, w, a, {}'.format(f)
res.at[f, c] = timeit(stmt, setp, number=50)
输出
19
你可以使用克罗内克积,具体可以查看numpy.kron
的文档:
>>> a = np.arange(12).reshape(3,4)
>>> print(np.kron(a, np.ones((2,2), dtype=a.dtype)))
[[ 0 0 1 1 2 2 3 3]
[ 0 0 1 1 2 2 3 3]
[ 4 4 5 5 6 6 7 7]
[ 4 4 5 5 6 6 7 7]
[ 8 8 9 9 10 10 11 11]
[ 8 8 9 9 10 10 11 11]]
不过,你原来的方法也没问题哦!