读取包含存储为numpy阵列的图像的hdf5文件的最有效方法是什么？

import h5py import io import os import cv2 import numpy as np from PIL import Image def convertJpgtoH5(input_dir, filename, output_dir): filepath = input_dir + '/' + filename print('image size: %d bytes'%os.path.getsize(filepath)) img_f = open(filepath, 'rb') binary_data = img_f.read() binary_data_np = np.asarray(binary_data) new_filepath = output_dir + '/' + filename[:-4] + '.hdf5' f = h5py.File(new_filepath, 'w') dset = f.create_dataset('image', data = binary_data_np) f.close() print('hdf5 file size: %d bytes'%os.path.getsize(new_filepath)) pathImg = '/path/to/images' pathH5 = '/path/to/hdf5/files' ext = [".jpg", ".jpeg", ".png", ".bmp", ".tiff", ".tif"] for img in os.listdir(pathImg): if img.endswith(tuple(ext)): convertJpgtoH5(pathImg, img, pathH5)

for hf in os.listdir(pathH5): if hf.endswith(".hdf5"): hf = h5py.File(f"{pathH5}/{hf}", "r") key = list(hf.keys())[0] data = np.array(hf[key]) img = Image.open(io.BytesIO(data)) image = cv2.cvtColor(np.float32(img), cv2.COLOR_BGR2RGB) hf.close()

1条回答

网友

1楼 · 发布于 2024-04-27 03:53:36

理想情况下，这应该作为一个副本关闭，因为我在上面的评论中引用的答案中解释了您想要做的大部分事情。我在这里包括这些链接：

有一个区别：我的示例将所有图像数据加载到一个HDF5文件中，然后为每个图像创建一个HDF5文件。坦率地说，我认为这样做没有多大价值。你最终得到的文件是原来的两倍，却一无所获。如果您仍然有兴趣这样做，这里还有两个答案可能会有所帮助（最后我更新了您的代码）：

为了解决您的具体问题，我修改了您的代码，只使用cv2（不需要PIL）。我调整了图像的大小，并将其保存为1个文件中的1个数据集。如果您正在使用这些图像来训练和测试CNN模型，您无论如何都需要这样做（它需要大小/形状一致的数组）。另外，我认为您可以将数据保存为int8，而不需要浮动。见下文

import h5py
import glob
import os
import cv2
import numpy as np

def convertImagetoH5(imgfilename):
    print('image size: %d bytes'%os.path.getsize(imgfilename))
    img = cv2.imread(imgfilename, cv2.COLOR_BGR2RGB)
    img_resize = cv2.resize(img, (IMG_WIDTH, IMG_HEIGHT) )
    return img_resize 


pathImg = '/path/to/images'
pathH5 = '/path/to/hdf5file'
ext_list = [".ppm", ".jpg", ".jpeg", ".png", ".bmp", ".tiff", ".tif"]
IMG_WIDTH = 120
IMG_HEIGHT = 120

#get list of all images and number of images
all_images = []
for ext in ext_list:
    all_images.extend(glob.glob(pathImg+"/*"+ext, recursive=True))
n_images = len(all_images)

ds_img_arr = np.zeros((n_images, IMG_WIDTH, IMG_HEIGHT,3),dtype=np.uint8)

for cnt,img in enumerate(all_images):
    img_arr = convertImagetoH5(img)
    ds_img_arr[cnt]=img_arr[:]
    
h5_filepath = pathH5 + '/all_image_data.hdf5'
with h5py.File(h5_filepath, 'w') as h5f:
    dset = h5f.create_dataset('images', data=ds_img_arr)

print('hdf5 file size: %d bytes'%os.path.getsize(h5_filepath))

with h5py.File(h5_filepath, "r") as h5r:
    key = list(h5r.keys())[0]
    print (key, h5r[key].shape, h5r[key].dtype)

如果您真的希望每个图像有一个HDF5，下面将更新您问题中的代码。同样，仅使用cv2，无需PIL。图像不会调整大小。这仅用于完整性（演示流程）。这不是管理图像数据的方式

import h5py
import os
import cv2
import numpy as np

def convertImagetoH5(input_dir, filename, output_dir):
    filepath = input_dir + '/' + filename
    print('image size: %d bytes'%os.path.getsize(filepath))
    img = cv2.imread(filepath, cv2.COLOR_BGR2RGB)
    new_filepath = output_dir + '/' + filename[:-4] + '.hdf5'
    with h5py.File(new_filepath, 'w') as h5f:
        h5f.create_dataset('image', data =img)
    print('hdf5 file size: %d bytes'%os.path.getsize(new_filepath))

pathImg = '/path/to/images'
pathH5 = '/path/to/hdf5file'
ext = [".ppm", ".jpg", ".jpeg", ".png", ".bmp", ".tiff", ".tif"]

# Loop thru image files and create a matching HDF5 file
for img in os.listdir(pathImg):
        if img.endswith(tuple(ext)):
            convertImagetoH5(pathImg, img, pathH5)

# Loop thru HDF5 files and read image dataset (as an array)
for h5name in os.listdir(pathH5):
    if h5name.endswith(".hdf5"):
        with h5f = h5py.File(f"{pathH5}/{h5name}", "r") as h5f:
            key = list(h5f.keys())[0]
            image = h5f[key][:]
            print(f'{h5name}: {image.shape}, {image.dtype}')

相关问题更多 >

编程相关推荐

热门问题

热门文章