是否可以在不定义数组的情况下创建numpy文件？

def create_empty_numpy_file(filename, shape, dtype=np.float64): with tempfile.TemporaryFile() as tmp: memmap = np.memmap(tmp, dtype, mode="w+", shape=shape) np.save(filename, memmap)

class MockFlags: def __init__(self, shape, c_contiguous=True): self.c_contiguous = c_contiguous self.f_contiguous = (not c_contiguous) or (c_contiguous and len(shape) == 1) class MockArray: def __init__(self, shape, dtype=np.float64, c_contiguous=True): self.shape = shape self.dtype = np.dtype(dtype) self.flags = MockFlags(shape, c_contiguous) def save(self, filename): if self.dtype.itemsize == 0: buffersize = 0 else: # Set buffer size to 16 MiB to hide the Python loop overhead. buffersize = max(16 * 1024 ** 2 // self.dtype.itemsize, 1) n_chunks, remainder = np.divmod( np.product(self.shape) * self.dtype.itemsize, buffersize ) with open(filename, "wb") as f: np.lib.format.write_array_header_2_0( f, np.lib.format.header_data_from_array_1_0(self) ) for chunk in range(n_chunks): f.write(b"\x00" * buffersize) f.write(b"\x00" * remainder)

1条回答

网友

1楼 · 发布于 2024-05-14 03:14:28

Numpy文件格式为really simple。您可以使用一些文档不足的函数从构建数组所需的元数据创建所需的头字节，而无需实际构建数组

import numpy as np

def create_npy_header_bytes(
    shape, dtype=np.float64, fortran_order=False, format_version="2.0"
):
    # 4 or 2-byte unsigned integer, depending on version
    n_size_bytes = 4 if format_version[0] == "2" else 2
    magic = b"\x93NUMPY"
    version_info = (
        int(each).to_bytes(1, "little") for each in format_version.split(".")
    )

    # Keys are supposed to be alphabetically sorted
    header = {
        "descr": np.lib.format.dtype_to_descr(np.dtype(dtype)),
        "fortran_order": fortran_order,
        "shape": shape
    }

    # Pad header up to multiple of 64 bytes
    header_bytes = str(header).encode("ascii")
    header_len = len(header_bytes)
    current_length = header_len + len(magic) + 2 + n_size_bytes  # for version information
    required_length = int(np.ceil(current_length / 64.0) * 64)
    padding = required_length - current_length - 1  # For newline
    header_bytes += b" " * padding + b"\n"

    # Length of the header dict, including padding and newline
    length = len(header_bytes).to_bytes(n_size_bytes, "little")

    return b"".join((magic, *version_info, length, header_bytes))

您可以测试它是否与此代码段等效：

import numpy as np
import io
x = np.zeros((10, 3, 4))

first = create_npy_header_bytes(x.shape)
stream = io.BytesIO()
np.lib.format.write_array_header_2_0(
    stream, np.lib.format.header_data_from_array_1_0(x)
)
print(f"Library: {stream.getvalue()}")
print(f"Custom: {first}")

您应该看到如下内容：

Library: b"\x93NUMPY\x02\x00t\x00\x00\x00{'descr': '<f8', 'fortran_order': False, 'shape': (10, 3, 4), }                                                    \n"
Custom: b"\x93NUMPY\x02\x00t\x00\x00\x00{'descr': '<f8', 'fortran_order': False, 'shape': (10, 3, 4)}                                                      \n"

除了标题dict表示形式中的尾随逗号之外，其他匹配项。这无关紧要，因为这是dict的一个有效Python文本字符串表示形式，如果有逗号，它会很高兴地忽略它

作为一种替代方法，您可以模拟出一个对象，该对象具有用于生成标头本身的库函数所需的字段。对于np.lib.format.header_data_from_array_1_0，它们似乎是.flags（必须有一个字段c_contiguous和/或f_contiguous）和dtype。这实际上要简单得多，看起来像：

import numpy as np
import io

class MockFlags:
    def __init__(self, shape, c_contiguous=True):
        self.c_contiguous = c_contiguous
        self.f_contiguous = (not c_contiguous) or (c_contiguous and len(shape) == 1)

class MockArray:
    def __init__(self, shape, dtype=np.float64, c_contiguous=True):
        self.shape = shape
        self.dtype = np.dtype(dtype)
        self.flags = MockFlags(shape, c_contiguous)

mock = MockArray((10, 3, 4))
stream = io.BytesIO()
np.lib.format.write_array_header_2_0(
    stream, np.lib.format.header_data_from_array_1_0(mock)
)
print(stream.getvalue())

你应该看到：

b"\x93NUMPY\x02\x00t\x00\x00\x00{'descr': '<f8', 'fortran_order': False, 'shape': (10, 3, 4), }                                                    \n"

这很好地匹配了我们上面的内容，但不必做计数字节、填充等糟糕的工作。更好：）

相关问题更多 >

编程相关推荐

热门问题

热门文章