如何跟踪从多处理p返回的异步结果

import multiprocessing from multiprocessing import Pool import os import signal import time import inspect def multiply(multiplicand1=0, multiplicand2=0): return multiplicand1*multiplicand2 def workFunctionTest(**kwargs): time.sleep(3) return kwargs def printHR(object): """ This function prints a specified object in a human readable way. """ # dictionary if isinstance(object, dict): for key, value in sorted(object.items()): print u'{a1}: {a2}'.format(a1=key, a2=value) # list or tuple elif isinstance(object, list) or isinstance(object, tuple): for element in object: print element # other else: print object class Job(object): def __init__( self, workFunction=workFunctionTest, workFunctionKeywordArguments={'testString': "hello world"}, workFunctionTimeout=1, naturalLanguageString=None, classInstance=None, resultGetter=None, result=None ): self.workFunction=workFunction self.workFunctionKeywordArguments=workFunctionKeywordArguments self.workFunctionTimeout=workFunctionTimeout self.naturalLanguageString=naturalLanguageString self.classInstance=self.__class__.__name__ self.resultGetter=resultGetter self.result=result def description(self): descriptionString="" for key, value in sorted(vars(self).items()): descriptionString+=str("{a1}:{a2} ".format(a1=key, a2=value)) return descriptionString def printout(self): """ This method prints a dictionary of all data attributes. """ printHR(vars(self)) class JobGroup(object): """ This class acts as a container for jobs. The data attribute jobs is a list of job objects. """ def __init__( self, jobs=None, naturalLanguageString="null", classInstance=None, result=None ): self.jobs=jobs self.naturalLanguageString=naturalLanguageString self.classInstance=self.__class__.__name__ self.result=result def description(self): descriptionString="" for key, value in sorted(vars(self).items()): descriptionString+=str("{a1}:{a2} ".format(a1=key, a2=value)) return descriptionString def printout(self): """ This method prints a dictionary of all data attributes. """ printHR(vars(self)) def initialise_processes(): signal.signal(signal.SIGINT, signal.SIG_IGN) def execute( jobObject=None, numberOfProcesses=multiprocessing.cpu_count() ): # Determine the current function name. functionName=str(inspect.stack()[0][3]) def collateResults(result): """ This is a process pool callback function which collates a list of results returned. """ # Determine the caller function name. functionName=str(inspect.stack()[1][3]) print("{a1}: result: {a2}".format(a1=functionName, a2=result)) results.append(result) def getResults(job): # Determine the current function name. functionName=str(inspect.stack()[0][3]) while True: try: result=job.resultGetter.get(job.workFunctionTimeout) break except multiprocessing.TimeoutError: print("{a1}: subprocess timeout for job".format(a1=functionName, a2=job.description())) #job.result=result return result # Create a process pool. pool1 = multiprocessing.Pool(numberOfProcesses, initialise_processes) print("{a1}: pool {a2} of {a3} processes created".format(a1=functionName, a2=str(pool1), a3=str(numberOfProcesses))) # Unpack the input job object and submit it to the process pool. print("{a1}: unpacking and applying job object {a2} to pool...".format(a1=functionName, a2=jobObject)) if isinstance(jobObject, Job): # If the input job object is a job, apply it to the pool with its associated timeout specification. # Return a list of results. job=jobObject print("{a1}: job submitted to pool: {a2}".format(a1=functionName, a2=job.description())) # Apply the job to the pool, saving the object pool.ApplyResult to the job object. job.resultGetter=pool1.apply_async( func=job.workFunction, kwds=job.workFunctionKeywordArguments ) # Get results. # Acquire the job result with respect to the specified job timeout and apply this result to the job data attribute result. print("{a1}: getting results for job...".format(a1=functionName)) job.result=getResults(job) print("{a1}: job completed: {a2}".format(a1=functionName, a2=job.description())) print("{a1}: job result: {a2}".format(a1=functionName, a2=job.result)) # Return the job result from execute. return job.result pool1.terminate() pool1.join() elif isinstance(jobObject, JobGroup): # If the input job object is a job group, cycle through each job and apply it to the pool with its associated timeout specification. for job in jobObject.jobs: print("{a1}: job submitted to pool: {a2}".format(a1=functionName, a2=job.description())) # Apply the job to the pool, saving the object pool.ApplyResult to the job object. job.resultGetter=pool1.apply_async( func=job.workFunction, kwds=job.workFunctionKeywordArguments ) # Get results. # Cycle through each job and and append the result for the job to a list of results. results=[] for job in jobObject.jobs: # Acquire the job result with respect to the specified job timeout and apply this result to the job data attribute result. print("{a1}: getting results for job...".format(a1=functionName)) job.result=getResults(job) print("{a1}: job completed: {a2}".format(a1=functionName, a2=job.description())) #print("{a1}: job result: {a2}".format(a1=functionName, a2=job.result)) # Collate the results. results.append(job.result) # Apply the list of results to the job group data attribute results. jobObject.results=results print("{a1}: job group results: {a2}".format(a1=functionName, a2=jobObject.results)) # Return the job result list from execute. return jobObject.results pool1.terminate() pool1.join() else: # invalid input object print("{a1}: invalid job object {a2}".format(a1=functionName, a2=jobObject)) def main(): print('-'*80) print("MULTIPROCESSING SYSTEM DEMONSTRATION\n") # Create a job. print("# creating a job...\n") job1=Job( workFunction=workFunctionTest, workFunctionKeywordArguments={'testString': "hello world"}, workFunctionTimeout=4 ) print("- printout of new job object:") job1.printout() print("\n- printout of new job object in logging format:") print job1.description() # Create another job. print("\n# creating another job...\n") job2=Job( workFunction=multiply, workFunctionKeywordArguments={'multiplicand1': 2, 'multiplicand2': 3}, workFunctionTimeout=6 ) print("- printout of new job object:") job2.printout() print("\n- printout of new job object in logging format:") print job2.description() # Create a JobGroup object. print("\n# creating a job group (of jobs 1 and 2)...\n") jobGroup1=JobGroup( jobs=[job1, job2], ) print("- printout of new job group object:") jobGroup1.printout() print("\n- printout of new job group object in logging format:") print jobGroup1.description() # Submit the job group. print("\nready to submit job group") response=raw_input("\nPress Enter to continue...\n") execute(jobGroup1) response=raw_input("\nNote the results printed above. Press Enter to continue the demonstration.\n") # Demonstrate timeout. print("\n # creating a new job in order to demonstrate timeout functionality...\n") job3=Job( workFunction=workFunctionTest, workFunctionKeywordArguments={'testString': "hello world"}, workFunctionTimeout=1 ) print("- printout of new job object:") job3.printout() print("\n- printout of new job object in logging format:") print job3.description() print("\nNote the timeout specification of only 1 second.") # Submit the job. print("\nready to submit job") response=raw_input("\nPress Enter to continue...\n") execute(job3) response=raw_input("\nNote the recognition of timeouts printed above. This concludes the demonstration.") print('-'*80) if __name__ == '__main__': main()

1条回答

网友

1楼 · 发布于 2024-04-26 03:45:35

没有看到实际的代码，我只能笼统地回答。但有两个普遍的解决办法。

首先，不要使用callback并忽略AsyncResults，而是将它们存储在某种集合中。然后你就可以用这个收藏了。例如，如果希望能够使用某个函数作为键来查找该函数的结果，只需使用以下函数创建一个键dict：

def in_parallel(funcs):
    results = {}
    pool = mp.Pool()
    for func in funcs:
        results[func] = pool.apply_async(func)
    pool.close()
    pool.join()
    return {func: result.get() for func, result in results.items()}

或者，可以更改回调函数以按键将结果存储在集合中。例如：

def in_parallel(funcs):
    results = {}
    pool = mp.Pool()
    for func in funcs:
        def callback(result, func=func):
            results[func] = result
        pool.apply_async(func, callback=callback)
    pool.close()
    pool.join()
    return results

我用函数本身作为一个键。但是你想用索引来代替，这同样简单。任何你拥有的价值，你都可以作为一把钥匙。

同时，您链接的示例实际上只是对一堆参数调用同一个函数，等待它们全部完成，并以任意顺序将结果保留为某些iterable。这正是^{}所做的，但要简单得多。您可以用以下内容替换链接代码中的所有复杂内容：

pool = mp.Pool()
results = list(pool.imap_unordered(foo_pool, range(10)))
pool.close()
pool.join()

然后，如果希望结果按原始顺序而不是任意顺序排列，可以切换到imap或map。所以：

pool = mp.Pool()
results = pool.map(foo_pool, range(10))
pool.close()
pool.join()

如果你需要类似但过于复杂的东西来适应map范式，^{}可能会让你的生活比multiprocessing更轻松。如果您使用的是Python 2.x，则必须安装the backport。但是你可以用AsyncResults或callbacks（或map）做一些更难做的事情，就像把一大堆未来组成一个大未来。请参阅链接文档中的示例。

最后一句话：

The important points to emphasise are that I can not modify the existing functions…

如果不能修改函数，则始终可以将其包装。例如，假设我有一个函数，它返回一个数字的平方，但是我正在尝试异步构建一个dict映射数字到它们的平方，所以我也需要将原始数字作为结果的一部分。很简单：

def number_and_square(x):
    return x, square(x)

现在，我只需要apply_async(number_and_square)而不是square，就可以得到我想要的结果。

在上面的例子中，我没有这样做，因为在第一种情况下，我从调用端将密钥存储到集合中，在第二种情况下，我将其绑定到回调函数中。但是将它绑定到函数的包装器中与这两种方法一样简单，并且在这两种方法都不适用的情况下都是合适的。

相关问题更多 >

编程相关推荐

热门问题

热门文章