使用Dask将中间产物和结果写入文件

import dask.bag as db from other_functions import * input = db.read_text(file1) processing_parameter = parse_mapping_parameters(file2) intermediates = [] for p in mapping_parameter: intermediate = input.map(lambda x: process(x, p)) intermediates.append(intermediate) products = intermediates.pop(0) for intermediate in intermediates: products = product.products(i) result = products.map(calc_result) for i, intermediate in enumerate(intermediates): intermediate.to_textfiles(f'./data/intermediate_{i}.*.txt') result.to_textfiles(f'./data/result.*.txt')

1条回答

网友

1楼 · 发布于 2024-05-14 16:58:02

最终以这种方式解决了这个问题：

import dask.bag as db
from other_functions import *

input = db.read_text(file1)
processing_parameter = parse_mapping_parameters(file2)

to_compute = []
intermediates = []
for i, p in enumerate(mapping_parameter):
    intermediate = input.map(lambda x: process(x, p))
    to_compute.append(
        intermediate.to_textfiles(f'./data/intermediate_{i}.*.txt', compute=False)
    intermediates.append(intermediate)

products = intermediates.pop(0)
for intermediate in intermediates:
    products = product.products(i)

result = products.map(calc_result)

to_compute.append(result.to_textfiles(f'./data/result.*.txt', compute=False))
dask.compute(*to_compute)

没有意识到to_text方法有一个参数，可以让您获得一个懒惰的编写器。有一次我发现这是显而易见的。不过，我不确定这有多高效

相关问题更多 >

编程相关推荐

热门问题

热门文章