为什么我的多处理代码停止处理大型数据集？

import multiprocessing from multiprocessing import RawArray, Pool, Lock from functools import partial import numpy as np ## Set up initial fake data Information_Gains_Matrix = np.random.uniform(0,1,(22000,22000)) Weights_Matrix = np.random.uniform(0,1,(22000,22000)) ## Function I want to parallelise. def Feature_Moran_Index(Chunks,Wij,N): Moran_Index_Scores = np.zeros(Chunks.shape[0]) for i in np.arange(Chunks.shape[0]): print(Chunks[i]) # Print ind to show it's running Feature = Information_Gains_Matrix[Chunks[i],:] X_bar = np.mean(Feature) if X_bar != 0: Deviance = Feature - X_bar Outer_Deviance = np.outer(Deviance,Deviance) Deviance2 = Deviance * Deviance Denom = np.sum(Deviance2) Moran_Index_Scores[i] = (N/Wij) * (np.sum((W * np.ndarray.flatten(Outer_Deviance)))/Denom) return Moran_Index_Scores ## Set up chunks inds for each core. Use_Cores = (multiprocessing.cpu_count()-2) Chunk_Size = np.ceil(Information_Gains_Matrix.shape[0] / Use_Cores) Range = np.arange(Information_Gains_Matrix.shape[0]).astype("i") Chunk_Range = np.arange(Chunk_Size).astype("i") Chunks = [] for i in np.arange(Use_Cores-1): Chunks.append(Range[Chunk_Range]) Range = np.delete(Range,Chunk_Range) Chunks.append(Range) if __name__ == '__main__': W = RawArray('d', Information_Gains_Matrix.shape[0] * Information_Gains_Matrix.shape[1]) W_np = np.frombuffer(W, dtype=np.float64).reshape((Information_Gains_Matrix.shape[0], Information_Gains_Matrix.shape[1])) np.copyto(W_np, Weights_Matrix) N = Information_Gains_Matrix.shape[0] Wij = np.sum(Weights_Matrix) with Pool(processes=Use_Cores) as pool: Results = pool.map(partial(Feature_Moran_Index, Wij=Wij,N=N), Chunks) Moran_Index_Score = np.concatenate(Results)

1条回答

网友

1楼 · 发布于 2024-05-23 14:27:11

在Feature_Moran_Index中，Deviance具有形状(22000,)，并且Outer_Deviance具有形状(22000, 22000)，并且使用3.8GB的RAM

数量

np.sum(W * np.ndarray.flatten(Outer_Deviance))

相等于

np.sum(W_np * Outer_Deviance)

相等于

Deviance @ W_np @ Deviance

用最后一个表达式替换第一个表达式并删除Outer_Deviance的定义后，程序运行到结束，内存使用量为c。11GB

相关问题更多 >

编程相关推荐

热门问题

热门文章