如何使用TensorFlow tf.train.string_input_producer生成多个时代的数据？

filename_queue = tf.train.string_input_producer(filenames=['data.csv'], num_epochs=2, shuffle=True) col1_batch, col2_batch, col3_batch = tf.train.shuffle_batch([col1, col2, col3], batch_size=batch_size, capacity=capacity,\min_after_dequeue=min_after_dequeue, allow_smaller_final_batch=True)

import tensorflow as tf def read_my_file_format(filename_queue): reader = tf.TextLineReader() key, value = reader.read(filename_queue) record_defaults = [['1'], ['1'], ['1']] col1, col2, col3 = tf.decode_csv(value, record_defaults=record_defaults, field_delim='-') # col1 = list(map(int, col1.split(','))) # col2 = list(map(int, col2.split(','))) return col1, col2, col3 def input_pipeline(filenames, batch_size, num_epochs=1): filename_queue = tf.train.string_input_producer( filenames, num_epochs=num_epochs, shuffle=True) col1,col2,col3 = read_my_file_format(filename_queue) min_after_dequeue = 10 capacity = min_after_dequeue + 3 * batch_size col1_batch, col2_batch, col3_batch = tf.train.shuffle_batch( [col1, col2, col3], batch_size=batch_size, capacity=capacity, min_after_dequeue=min_after_dequeue, allow_smaller_final_batch=True) return col1_batch, col2_batch, col3_batch filenames=['1.txt'] batch_size = 3 num_epochs = 1 a1,a2,a3=input_pipeline(filenames, batch_size, num_epochs) with tf.Session() as sess: sess.run(tf.local_variables_initializer()) # start populating filename queue coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(coord=coord) try: while not coord.should_stop(): a, b, c = sess.run([a1, a2, a3]) print(a, b, c) except tf.errors.OutOfRangeError: print('Done training, epoch reached') finally: coord.request_stop() coord.join(threads)

2条回答

网友

1楼 · 编辑于 2024-05-16 15:30:10

你可能想看看这个answer类似的问题。

简而言之：

如果num_epochs>；1，则所有数据同时排队，并且不受纪元的影响，
所以你没有能力监视哪个时代正在退出队列。

您可以做的是引用的答案中的第一个建议，即使用num_epochs==1，并在每次运行时重新初始化本地队列变量（显然不是模型变量）。

init_queue = tf.variables_initializer(tf.get_collection(tf.GraphKeys.LOCAL_VARIABLES, scope='input_producer'))
with tf.Session() as sess: 
    sess.run(tf.global_variables_initializer())
    sess.run(tf.local_variables_initializer())
for e in range(num_epochs):
    with tf.Session() as sess:
       sess.run(init_queue) # reinitialize the local variables in the input_producer scope
       # start populating filename queue
       coord = tf.train.Coordinator()
       threads = tf.train.start_queue_runners(coord=coord)
       try:
           while not coord.should_stop():
               a, b, c = sess.run([a1, a2, a3])
               print(a, b, c)
       except tf.errors.OutOfRangeError:
           print('Done training, epoch reached')
       finally:
           coord.request_stop()

       coord.join(threads)

网友

2楼 · 编辑于 2024-05-16 15:30:10

作为Nicolas observes，^{}API不提供检测何时到达一个epoch结尾的能力，而是将所有epoch连接到一个长批中。出于这个原因，我们最近（在TensorFlow 1.2中）添加了^{} API，这使得表达更复杂的管道成为可能，包括您的用例。

下面的代码片段显示了如何使用tf.contrib.data编写程序：

import tensorflow as tf

def input_pipeline(filenames, batch_size):
    # Define a `tf.contrib.data.Dataset` for iterating over one epoch of the data.
    dataset = (tf.contrib.data.TextLineDataset(filenames)
               .map(lambda line: tf.decode_csv(
                    line, record_defaults=[['1'], ['1'], ['1']], field_delim='-'))
               .shuffle(buffer_size=10)  # Equivalent to min_after_dequeue=10.
               .batch(batch_size))

    # Return an *initializable* iterator over the dataset, which will allow us to
    # re-initialize it at the beginning of each epoch.
    return dataset.make_initializable_iterator() 

filenames=['1.txt']
batch_size = 3
num_epochs = 10
iterator = input_pipeline(filenames, batch_size)

# `a1`, `a2`, and `a3` represent the next element to be retrieved from the iterator.    
a1, a2, a3 = iterator.get_next()

with tf.Session() as sess:
    for _ in range(num_epochs):
        # Resets the iterator at the beginning of an epoch.
        sess.run(iterator.initializer)

        try:
            while True:
                a, b, c = sess.run([a1, a2, a3])
                print(a, b, c)
        except tf.errors.OutOfRangeError:
            # This will be raised when you reach the end of an epoch (i.e. the
            # iterator has no more elements).
            pass                 

        # Perform any end-of-epoch computation here.
        print('Done training, epoch reached')

相关问题更多 >

编程相关推荐

热门问题

热门文章