为什么？多处理.池以及多处理过程在Linux中表现得如此不同

from multiprocessing import Pool, Process, Queue import time, psutil, os, gc gct = time.time costTime = lambda ET: time.strftime('%H:%M:%S', time.gmtime(int(ET))) def getMemConsumption(): procId = os.getpid() proc = psutil.Process(procId) mem = proc.memory_info().rss return "process ID %d.\nMemory usage: %.6f GB" % (procId, mem*1.0/1024**3) def f_pool(l, n, jobID): try: result = {} # example of subprocess work for i in xrange(n): result[i] = l[i] # work done # gc.collect() print getMemConsumption() return 1, result, jobID except: return 0, {}, jobID def f_proc(q, l, n, jobID): try: result = {} # example of subprocess work for i in xrange(n): result[i] = l[i] # work done print getMemConsumption() q.put([1, result, jobID]) except: q.put([0, {}, jobID]) def initialSubProc(targetFunc, procArgs, jobID): outQueue = Queue() args = [outQueue] args.extend(procArgs) args.append(jobID) p = Process(target = targetFunc, args = tuple(args)) p.start() return p, outQueue def track_add_Proc(procList, outQueueList, maxProcN, jobCount, maxJobs, targetFunc, procArgs, joinFlag, all_result): if len(procList) < maxProcN: p, q = initialSubProc(targetFunc, procArgs, jobCount) outQueueList.append(q) procList.append(p) jobCount += 1 joinFlag.append(0) else: for i in xrange(len(procList)): if not procList[i].is_alive() and joinFlag[i] == 0: procList[i].join() all_results.append(outQueueList[i].get()) joinFlag[i] = 1 # in case of duplicating result of joined subprocess if jobCount < maxJobs: p, q = initialSubProc(targetFunc, procArgs, jobCount) procList[i] = p outQueueList[i] = q jobCount += 1 joinFlag[i] = 0 return jobCount if __name__ == '__main__': st = gct() d = {i:i**2 for i in xrange(10000000)} print "MainProcess create data dict\n%s" % getMemConsumption() print 'Time to create dict: %s\n\n' % costTime(gct()-st) nproc = 2 jobs = 8 subProcReturnDictLen = 1000 procArgs = [d, subProcReturnDictLen] print "Use multiprocessing.Pool, max subprocess = %d, jobs = %d\n" % (nproc, jobs) st = gct() pool = Pool(processes = nproc) for i in xrange(jobs): procArgs.append(i) sp = pool.apply_async(f_pool, tuple(procArgs)) procArgs.pop(2) res = sp.get() if res[0] == 1: # do something with the result pass else: # do something with subprocess exception handle pass pool.close() pool.join() print "Total time used to finish all jobs: %s" % costTime(gct()-st) print "Main Process\n", getMemConsumption(), '\n' print "Use multiprocessing.Process, max subprocess = %d, jobs = %d\n" % (nproc, jobs) st = gct() procList = [] outQueueList = [] all_results = [] jobCount = 0 joinFlag = [] while (jobCount < jobs): jobCount = track_add_Proc(procList, outQueueList, nproc, jobCount, jobs, f_proc, procArgs, joinFlag, all_results) for i in xrange(nproc): if joinFlag[i] == 0: procList[i].join() all_results.append(outQueueList[i].get()) joinFlag[i] = 1 for i in xrange(jobs): res = all_results[i] if res[0] == 1: # do something with the result pass else: # do something with subprocess exception handle pass print "Total time used to finish all jobs: %s" % costTime(gct()-st) print "Main Process\n", getMemConsumption()

1条回答

网友

1楼 · 发布于 2024-06-16 08:48:36

Question: I don't know why subprocesses from multiprocessing.Pool need about 1.6GB in the beginning,
... Pool seems like a wrapper of multiprocessing.Process

这是因为Pool为所有作业的结果保留内存。
第二，Pool使用两个SimpleQueue()和三个Threads。
第三，在传递给process之前，Pool复制所有传递的argv数据。在

您的process示例只使用一个Queue()作为所有，按原样传递argv。在

Pool远不能成为包装器。在

相关问题更多 >

编程相关推荐

热门问题

热门文章