Throttling Async Functions in Python Asyncio

前端 未结 2 738
一生所求
一生所求 2020-12-04 11:53

I have a list of awaitables that I want to pass to the asyncio.AbstractEventLoop but I need to throttle the requests to a third party API.

相关标签:
2条回答
  • 2020-12-04 12:10

    You can do this by implementing the leaky bucket algorithm:

    import asyncio
    import contextlib
    import collections
    import time
    
    from types import TracebackType
    from typing import Dict, Optional, Type
    
    try:  # Python 3.7
        base = contextlib.AbstractAsyncContextManager
        _current_task = asyncio.current_task
    except AttributeError:
        base = object  # type: ignore
        _current_task = asyncio.Task.current_task  # type: ignore
    
    class AsyncLeakyBucket(base):
        """A leaky bucket rate limiter.
    
        Allows up to max_rate / time_period acquisitions before blocking.
    
        time_period is measured in seconds; the default is 60.
    
        """
        def __init__(
            self,
            max_rate: float,
            time_period: float = 60,
            loop: Optional[asyncio.AbstractEventLoop] = None
        ) -> None:
            self._loop = loop
            self._max_level = max_rate
            self._rate_per_sec = max_rate / time_period
            self._level = 0.0
            self._last_check = 0.0
            # queue of waiting futures to signal capacity to
            self._waiters: Dict[asyncio.Task, asyncio.Future] = collections.OrderedDict()
    
        def _leak(self) -> None:
            """Drip out capacity from the bucket."""
            if self._level:
                # drip out enough level for the elapsed time since
                # we last checked
                elapsed = time.time() - self._last_check
                decrement = elapsed * self._rate_per_sec
                self._level = max(self._level - decrement, 0)
            self._last_check = time.time()
    
        def has_capacity(self, amount: float = 1) -> bool:
            """Check if there is enough space remaining in the bucket"""
            self._leak()
            requested = self._level + amount
            # if there are tasks waiting for capacity, signal to the first
            # there there may be some now (they won't wake up until this task
            # yields with an await)
            if requested < self._max_level:
                for fut in self._waiters.values():
                    if not fut.done():
                        fut.set_result(True)
                        break
            return self._level + amount <= self._max_level
    
        async def acquire(self, amount: float = 1) -> None:
            """Acquire space in the bucket.
    
            If the bucket is full, block until there is space.
    
            """
            if amount > self._max_level:
                raise ValueError("Can't acquire more than the bucket capacity")
    
            loop = self._loop or asyncio.get_event_loop()
            task = _current_task(loop)
            assert task is not None
            while not self.has_capacity(amount):
                # wait for the next drip to have left the bucket
                # add a future to the _waiters map to be notified
                # 'early' if capacity has come up
                fut = loop.create_future()
                self._waiters[task] = fut
                try:
                    await asyncio.wait_for(
                        asyncio.shield(fut),
                        1 / self._rate_per_sec * amount,
                        loop=loop
                    )
                except asyncio.TimeoutError:
                    pass
                fut.cancel()
            self._waiters.pop(task, None)
    
            self._level += amount
    
            return None
    
        async def __aenter__(self) -> None:
            await self.acquire()
            return None
    
        async def __aexit__(
            self,
            exc_type: Optional[Type[BaseException]],
            exc: Optional[BaseException],
            tb: Optional[TracebackType]
        ) -> None:
            return None
    

    Note that we leak capacity from the bucket opportunistically, there is no need to run a separate async task just to lower the level; instead, capacity are leaked out when testing for sufficient remaining capacity.

    Note that tasks that wait for capacity are kept in an ordered dictionary, and when there might be capacity to spare again, the first still-waiting task is woken up early.

    You can use this as a context manager; trying to acquire the bucket when it is full blocks until enough capacity has been freed again:

    bucket = AsyncLeakyBucket(100)
    
    # ...
    
    async with bucket:
        # only reached once the bucket is no longer full
    

    or you can call acquire() directly:

    await bucket.acquire()  # blocks until there is space in the bucket
    

    or you can simply test if there is space first:

    if bucket.has_capacity():
        # reject a request due to rate limiting
    

    Note that you can count some requests as 'heavier' or 'lighter' by increasing or decreasing the amount you 'drip' into the bucket:

    await bucket.acquire(10)
    if bucket.has_capacity(0.5):
    

    Do be careful with this though; when mixing large and small drips, small drips tend to get run before large drips when at or close to the maximum rate, because there is a greater likelyhood that there is enough free capacity for a smaller drip before there is space for a larger one.

    Demo:

    >>> import asyncio, time
    >>> bucket = AsyncLeakyBucket(5, 10)
    >>> async def task(id):
    ...     await asyncio.sleep(id * 0.01)
    ...     async with bucket:
    ...         print(f'{id:>2d}: Drip! {time.time() - ref:>5.2f}')
    ...
    >>> ref = time.time()
    >>> tasks = [task(i) for i in range(15)]
    >>> result = asyncio.run(asyncio.wait(tasks))
     0: Drip!  0.00
     1: Drip!  0.02
     2: Drip!  0.02
     3: Drip!  0.03
     4: Drip!  0.04
     5: Drip!  2.05
     6: Drip!  4.06
     7: Drip!  6.06
     8: Drip!  8.06
     9: Drip! 10.07
    10: Drip! 12.07
    11: Drip! 14.08
    12: Drip! 16.08
    13: Drip! 18.08
    14: Drip! 20.09
    

    The bucket is filled up quickly at the start in a burst, causing the rest of the tasks to be spread out more evenly; every 2 seconds enough capacity is freed for another task to be handled.

    The maximum burst size is equal to the maximum rate value, in the above demo that was set to 5. If you do not want to permit bursts, set the maximum rate to 1, and the time period to the minimum time between drips:

    >>> bucket = AsyncLeakyBucket(1, 1.5)  # no bursts, drip every 1.5 seconds
    >>> async def task():
    ...     async with bucket:
    ...         print(f'Drip! {time.time() - ref:>5.2f}')
    ...
    >>> ref = time.time()
    >>> tasks = [task() for _ in range(5)]
    >>> result = asyncio.run(asyncio.wait(tasks))
    Drip!  0.00
    Drip!  1.50
    Drip!  3.01
    Drip!  4.51
    Drip!  6.02
    

    I've gotten round to packaging this up as a Python project: https://github.com/mjpieters/aiolimiter

    0 讨论(0)
  • 2020-12-04 12:27

    Another solution - using bounded semaphores - by a coworker, mentor, and friend, is the following:

    import asyncio
    
    
    class AsyncLeakyBucket(object):
    
        def __init__(self, max_tasks: float, time_period: float = 60, loop: asyncio.events=None):
            self._delay_time = time_period / max_tasks
            self._sem = asyncio.BoundedSemaphore(max_tasks)
            self._loop = loop or asyncio.get_event_loop()
            self._loop.create_task(self._leak_sem())
    
        async def _leak_sem(self):
            """
            Background task that leaks semaphore releases based on the desired rate of tasks per time_period
            """
            while True:
                await asyncio.sleep(self._delay_time)
                try:
                    self._sem.release()
                except ValueError:
                    pass
    
        async def __aenter__(self) -> None:
            await self._sem.acquire()
    
        async def __aexit__(self, exc_type, exc, tb) -> None:
            pass
    

    Can still be used with the same async with bucket code as in @Martijn's answer

    0 讨论(0)
提交回复
热议问题