筛选数据集以仅从特定类中获取图像

import tensorflow as tf import tensorflow_datasets as tfds import numpy as np builder = tfds.builder("omniglot") # assert builder.info.splits['train'].num_examples == 60000 builder.download_and_prepare() # Load data from disk as tf.data.Datasets datasets = builder.as_dataset() dataset, test_dataset = datasets['train'], datasets['test'] def resize(example): image = example['image'] image = tf.image.resize(image, [28, 28]) image = tf.image.rgb_to_grayscale(image, ) image = image / 255 one_hot_label = np.zeros((51, 10)) return image, one_hot_label, example['alphabet'] def stack(image, label, alphabet): return (image, label), label[-1] def filter_func(image, label, alphabet): # get just images from alphabet in array, not just 2 arr = np.array(2,3,4,5) result = tf.reshape(tf.equal(alphabet, 2 ), []) return result # correct size dataset = dataset.map(resize) # now filter the dataset for the batch dataset = dataset.filter(filter_func) # infinite stream of batches (classes*samples + 1) dataset = dataset.repeat().shuffle(1024).batch(51) # stack the images together dataset = dataset.map(stack) dataset = dataset.shuffle(buffer_size=1000) dataset = dataset.batch(32) for i, (image, label) in enumerate(tfds.as_numpy(dataset)): print(i, image[0].shape)

1条回答

网友

1楼 · 发布于 2024-04-18 10:50:50

^{}支持广播，并允许将标量与rank > 0的张量进行比较。在

要仅保留特定标签，请使用以下谓词：

dataset = datasets['train']

def predicate(x, allowed_labels=tf.constant([0., 1., 2.])):
    label = x['label']
    isallowed = tf.equal(allowed_labels, tf.cast(label, tf.float32))
    reduced = tf.reduce_sum(tf.cast(isallowed, tf.float32))
    return tf.greater(reduced, tf.constant(0.))

dataset = dataset.filter(predicate).batch(20)

for i, x in enumerate(tfds.as_numpy(dataset)):
    print(x['label'])
# [1 0 0 1 2 1 1 2 1 0 0 1 2 0 1 0 2 2 0 1]
# [1 0 2 2 0 2 1 2 1 2 2 2 0 2 0 2 1 2 1 1]
# [2 1 2 1 0 1 1 0 1 2 2 0 2 0 1 0 0 0 0 0]

allowed_labels指定要保留的标签。所有不在此张量中的标签都将被过滤掉。在

相关问题更多 >

编程相关推荐

热门问题

热门文章