如何在Python中给函数添加超时

33 投票
5 回答
45295 浏览
提问于 2025-04-15 18:52

过去有很多尝试想在Python中添加超时功能,也就是说,当设定的时间到了,正在等待的代码可以继续执行。可惜的是,以前的方法要么让正在运行的函数继续运行并消耗资源,要么使用特定平台的方法强行结束这个函数。这个wiki的目的是为这个很多程序员在不同项目中遇到的问题提供一个跨平台的解决方案。

#! /usr/bin/env python
"""Provide way to add timeout specifications to arbitrary functions.

There are many ways to add a timeout to a function, but no solution
is both cross-platform and capable of terminating the procedure. This
module use the multiprocessing module to solve both of those problems."""

################################################################################

__author__ = 'Stephen "Zero" Chappell <Noctis.Skytower@gmail.com>'
__date__ = '11 February 2010'
__version__ = '$Revision: 3 $'

################################################################################

import inspect
import sys
import time
import multiprocessing

################################################################################

def add_timeout(function, limit=60):
    """Add a timeout parameter to a function and return it.

    It is illegal to pass anything other than a function as the first
    parameter. If the limit is not given, it gets a default value equal
    to one minute. The function is wrapped and returned to the caller."""
    assert inspect.isfunction(function)
    if limit <= 0:
        raise ValueError()
    return _Timeout(function, limit)

class NotReadyError(Exception): pass

################################################################################

def _target(queue, function, *args, **kwargs):
    """Run a function with arguments and return output via a queue.

    This is a helper function for the Process created in _Timeout. It runs
    the function with positional arguments and keyword arguments and then
    returns the function's output by way of a queue. If an exception gets
    raised, it is returned to _Timeout to be raised by the value property."""
    try:
        queue.put((True, function(*args, **kwargs)))
    except:
        queue.put((False, sys.exc_info()[1]))

class _Timeout:

    """Wrap a function and add a timeout (limit) attribute to it.

    Instances of this class are automatically generated by the add_timeout
    function defined above. Wrapping a function allows asynchronous calls
    to be made and termination of execution after a timeout has passed."""

    def __init__(self, function, limit):
        """Initialize instance in preparation for being called."""
        self.__limit = limit
        self.__function = function
        self.__timeout = time.clock()
        self.__process = multiprocessing.Process()
        self.__queue = multiprocessing.Queue()

    def __call__(self, *args, **kwargs):
        """Execute the embedded function object asynchronously.

        The function given to the constructor is transparently called and
        requires that "ready" be intermittently polled. If and when it is
        True, the "value" property may then be checked for returned data."""
        self.cancel()
        self.__queue = multiprocessing.Queue(1)
        args = (self.__queue, self.__function) + args
        self.__process = multiprocessing.Process(target=_target,
                                                 args=args,
                                                 kwargs=kwargs)
        self.__process.daemon = True
        self.__process.start()
        self.__timeout = self.__limit + time.clock()

    def cancel(self):
        """Terminate any possible execution of the embedded function."""
        if self.__process.is_alive():
            self.__process.terminate()

    @property
    def ready(self):
        """Read-only property indicating status of "value" property."""
        if self.__queue.full():
            return True
        elif not self.__queue.empty():
            return True
        elif self.__timeout < time.clock():
            self.cancel()
        else:
            return False

    @property
    def value(self):
        """Read-only property containing data returned from function."""
        if self.ready is True:
            flag, load = self.__queue.get()
            if flag:
                return load
            raise load
        raise NotReadyError()

    def __get_limit(self):
        return self.__limit

    def __set_limit(self, value):
        if value <= 0:
            raise ValueError()
        self.__limit = value

    limit = property(__get_limit, __set_limit,
                     doc="Property for controlling the value of the timeout.")

编辑:这段代码是为Python 3.x写的,并不是为了作为装饰器来修饰类的方法。multiprocessing模块并不是为了在进程间修改类实例而设计的。

5 个回答

6

这是获取Jerub提到的装饰器语法的方法

def timeout(limit=None):
    if limit is None:
        limit = DEFAULT_TIMEOUT
    if limit <= 0:
        raise TimeoutError() # why not ValueError here?
    def wrap(function):
        return _Timeout(function,limit)
    return wrap

@timeout(15)
def mymethod(): pass
13

你代码的主要问题是过度使用了双下划线来避免命名冲突,但这个类其实并不打算被继承。

一般来说,self.__foo 这种写法就像是代码中的一个警告,应该加上注释,比如 # 这是一个混入类,我们不希望任意子类产生命名冲突

此外,这个方法的客户端 API 看起来是这样的:

def mymethod(): pass

mymethod = add_timeout(mymethod, 15)

# start the processing    
timeout_obj = mymethod()
try:
    # access the property, which is really a function call
    ret = timeout_obj.value
except TimeoutError:
    # handle a timeout here
    ret = None

这并不是很符合 Python 的风格,更好的客户端 API 应该是:

@timeout(15)
def mymethod(): pass

try:
    my_method()
except TimeoutError:
    pass

你在类中使用了 @property,但它其实是一个会改变状态的访问器,这样做并不好。比如,当 .value 被访问两次时,会发生什么?看起来会出错,因为 queue.get() 会返回无效数据,因为队列已经是空的了。

完全去掉 @property。在这种情况下不要使用它,它不适合你的需求。让 call 在被调用时执行,并返回值或者直接抛出异常。如果你真的需要稍后访问值,可以把它做成一个方法,比如 .get() 或 .value()。

关于 _target 的代码应该稍微重写一下:

def _target(queue, function, *args, **kwargs):
    try:
        queue.put((True, function(*args, **kwargs)))
    except:
        queue.put((False, exc_info())) # get *all* the exec info, don't do exc_info[1]

# then later:
    raise exc_info[0], exc_info[1], exc_info[2]

这样堆栈跟踪信息就能被正确保留,程序员也能看到。

我觉得你在写一个有用的库方面已经迈出了不错的一步,我喜欢你使用处理模块来实现目标。

3

这个问题是在9年前提出来的,之后Python发生了不少变化,我的经验也增加了很多。在查看了标准库中的其他API后,我想要部分复制其中一个,所以写了这个模块,目的是和问题中提到的那个模块有点相似。

asynchronous.py

#! /usr/bin/env python3
import _thread
import abc as _abc
import collections as _collections
import enum as _enum
import math as _math
import multiprocessing as _multiprocessing
import operator as _operator
import queue as _queue
import signal as _signal
import sys as _sys
import time as _time

__all__ = (
    'Executor',
    'get_timeout',
    'set_timeout',
    'submit',
    'map_',
    'shutdown'
)


class _Base(metaclass=_abc.ABCMeta):
    __slots__ = (
        '__timeout',
    )

    @_abc.abstractmethod
    def __init__(self, timeout):
        self.timeout = _math.inf if timeout is None else timeout

    def get_timeout(self):
        return self.__timeout

    def set_timeout(self, value):
        if not isinstance(value, (float, int)):
            raise TypeError('value must be of type float or int')
        if value <= 0:
            raise ValueError('value must be greater than zero')
        self.__timeout = value

    timeout = property(get_timeout, set_timeout)


def _run_and_catch(fn, args, kwargs):
    # noinspection PyPep8,PyBroadException
    try:
        return False, fn(*args, **kwargs)
    except:
        return True, _sys.exc_info()[1]


def _run(fn, args, kwargs, queue):
    queue.put_nowait(_run_and_catch(fn, args, kwargs))


class _State(_enum.IntEnum):
    PENDING = _enum.auto()
    RUNNING = _enum.auto()
    CANCELLED = _enum.auto()
    FINISHED = _enum.auto()
    ERROR = _enum.auto()


def _run_and_catch_loop(iterable, *args, **kwargs):
    exception = None
    for fn in iterable:
        error, value = _run_and_catch(fn, args, kwargs)
        if error:
            exception = value
    if exception:
        raise exception


class _Future(_Base):
    __slots__ = (
        '__queue',
        '__process',
        '__start_time',
        '__callbacks',
        '__result',
        '__mutex'
    )

    def __init__(self, timeout, fn, args, kwargs):
        super().__init__(timeout)
        self.__queue = _multiprocessing.Queue(1)
        self.__process = _multiprocessing.Process(
            target=_run,
            args=(fn, args, kwargs, self.__queue),
            daemon=True
        )
        self.__start_time = _math.inf
        self.__callbacks = _collections.deque()
        self.__result = True, TimeoutError()
        self.__mutex = _thread.allocate_lock()

    @property
    def __state(self):
        pid, exitcode = self.__process.pid, self.__process.exitcode
        return (_State.PENDING if pid is None else
                _State.RUNNING if exitcode is None else
                _State.CANCELLED if exitcode == -_signal.SIGTERM else
                _State.FINISHED if exitcode == 0 else
                _State.ERROR)

    def __repr__(self):
        root = f'{type(self).__name__} at {id(self)} state={self.__state.name}'
        if self.__state < _State.CANCELLED:
            return f'<{root}>'
        error, value = self.__result
        suffix = f'{"raised" if error else "returned"} {type(value).__name__}'
        return f'<{root} {suffix}>'

    def __consume_callbacks(self):
        while self.__callbacks:
            yield self.__callbacks.popleft()

    def __invoke_callbacks(self):
        self.__process.join()
        _run_and_catch_loop(self.__consume_callbacks(), self)

    def cancel(self):
        self.__process.terminate()
        self.__invoke_callbacks()

    def __auto_cancel(self):
        elapsed_time = _time.perf_counter() - self.__start_time
        if elapsed_time > self.timeout:
            self.cancel()
        return elapsed_time

    def cancelled(self):
        self.__auto_cancel()
        return self.__state is _State.CANCELLED

    def running(self):
        self.__auto_cancel()
        return self.__state is _State.RUNNING

    def done(self):
        self.__auto_cancel()
        return self.__state > _State.RUNNING

    def __handle_result(self, error, value):
        self.__result = error, value
        self.__invoke_callbacks()

    def __ensure_termination(self):
        with self.__mutex:
            elapsed_time = self.__auto_cancel()
            if not self.__queue.empty():
                self.__handle_result(*self.__queue.get_nowait())
            elif self.__state < _State.CANCELLED:
                remaining_time = self.timeout - elapsed_time
                if remaining_time == _math.inf:
                    remaining_time = None
                try:
                    result = self.__queue.get(True, remaining_time)
                except _queue.Empty:
                    self.cancel()
                else:
                    self.__handle_result(*result)

    def result(self):
        self.__ensure_termination()
        error, value = self.__result
        if error:
            raise value
        return value

    def exception(self):
        self.__ensure_termination()
        error, value = self.__result
        if error:
            return value

    def add_done_callback(self, fn):
        if self.done():
            fn(self)
        else:
            self.__callbacks.append(fn)

    def _set_running_or_notify_cancel(self):
        if self.__state is _State.PENDING:
            self.__process.start()
            self.__start_time = _time.perf_counter()
        else:
            self.cancel()


class Executor(_Base):
    __slots__ = (
        '__futures',
    )

    def __init__(self, timeout=None):
        super().__init__(timeout)
        self.__futures = set()

    def submit(self, fn, *args, **kwargs):
        future = _Future(self.timeout, fn, args, kwargs)
        self.__futures.add(future)
        future.add_done_callback(self.__futures.remove)
        # noinspection PyProtectedMember
        future._set_running_or_notify_cancel()
        return future

    @staticmethod
    def __cancel_futures(iterable):
        _run_and_catch_loop(map(_operator.attrgetter('cancel'), iterable))

    def map(self, fn, *iterables):
        futures = tuple(self.submit(fn, *args) for args in zip(*iterables))

        def result_iterator():
            future_iterator = iter(futures)
            try:
                for future in future_iterator:
                    yield future.result()
            finally:
                self.__cancel_futures(future_iterator)

        return result_iterator()

    def shutdown(self):
        self.__cancel_futures(frozenset(self.__futures))

    def __enter__(self):
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        self.shutdown()
        return False


_executor = Executor()
get_timeout = _executor.get_timeout
set_timeout = _executor.set_timeout
submit = _executor.submit
map_ = _executor.map
shutdown = _executor.shutdown
del _executor

撰写回答