多维数组的就地洗牌

def shuffle1D(np.ndarray[double, ndim=1] x): cdef np.ndarray[long, ndim=1] idx = np.where(~np.isnan(x))[0] cdef unsigned int i,j,n,m randint = np.random.randint for i in xrange(len(idx)-1, 0, -1): j = randint(i+1) n,m = idx[i], idx[j] x[n], x[m] = x[m], x[n]

2条回答

网友

1楼 · 编辑于 2024-05-29 02:50:08

下面的算法是基于切片的，其中不进行复制，它应该适用于任何np.ndarray。主要步骤是：

np.ndindex()用于遍历不同的多维索引，不包括属于要随机移动的轴的索引
你已经为一维情况开发的洗牌应用。在

代码：

def shuffleND(np.ndarray x, axis=-1):
    cdef np.ndarray[long long, ndim=1] idx
    cdef unsigned int i, j, n, m
    if axis==-1:
        axis = x.ndim-1
    all_shape = list(np.shape(x))
    shape = all_shape[:]
    shape.pop(axis)
    for slices in np.ndindex(*shape):
        slices = list(slices)
        axis_slice = slices[:]
        axis_slice.insert(axis, slice(None))
        idx = np.where(~np.isnan(x[tuple(axis_slice)]))[0]
        for i in range(idx.shape[0]-1, 0, -1):
            j = randint(i+1)
            n, m = idx[i], idx[j]
            slice1 = slices[:]
            slice1.insert(axis, n)
            slice2 = slices[:]
            slice2.insert(axis, m)
            slice1 = tuple(slice1)
            slice2 = tuple(slice2)
            x[slice1], x[slice2] = x[slice2], x[slice1]
    return x

网友

2楼 · 编辑于 2024-05-29 02:50:08

感谢@Veedrac的评论，这个答案使用了更多Cython功能。在

指针数组沿axis存储值的内存地址
您的算法与修改that checks for ^{} values一起使用，防止它们被排序
它不会为C有序数组创建副本。如果是Fortran有序数组，ravel()命令将返回一个副本。这可以通过创建另一个包含x值的双指针数组来改进，可能会有一些缓存惩罚。。。在

这个代码比基于切片的另一个代码至少快一个数量级。在

from libc.stdlib cimport malloc, free

cimport numpy as np
import numpy as np
from numpy.random import randint

cdef extern from "numpy/npy_math.h":
    bint npy_isnan(double x)

def shuffleND(x, int axis=-1):
    cdef np.ndarray[double, ndim=1] v # view of x
    cdef np.ndarray[int, ndim=1] strides
    cdef int i, j
    cdef int num_axis, pos, stride
    cdef double tmp
    cdef double **v_axis

    if axis==-1:
        axis = x.ndim-1

    shape = list(x.shape)
    num_axis = shape.pop(axis)

    v_axis = <double **>malloc(num_axis*sizeof(double *))
    for i in range(num_axis):
        v_axis[i] = <double *>malloc(1*sizeof(double))

    try:
        tmp_strides = [s//x.itemsize for s in x.strides]
        stride = tmp_strides.pop(axis)
        strides = np.array(tmp_strides, dtype=np.int32)
        v = x.ravel()
        for indices in np.ndindex(*shape):
            pos = (strides*indices).sum()
            for i in range(num_axis):
                v_axis[i] = &v[pos + i*stride]
            for i in range(num_axis-1, 0, -1):
                j = randint(i+1)
                if npy_isnan(v_axis[i][0]) or npy_isnan(v_axis[j][0]):
                    continue
                tmp = v_axis[i][0]
                v_axis[i][0] = v_axis[j][0]
                v_axis[j][0] = tmp
    finally:
        free(v_axis)

    return x

相关问题更多 >

编程相关推荐

热门问题

热门文章

多维数组的就地洗牌

相关问题 更多 >

编程相关推荐

热门问题

热门文章

相关问题更多 >