Multiprocessing.Queue deadlocking during Process spawning

风流意气都作罢 提交于 2021-01-29 14:33:44

问题


Let me start with thank you for taking the time for reading this. First off I would like to start to share my code, see blow. It is around 200 lines of code but most of the lines are property definitions.

Some context: I am just trying to get a beter understanding of multiprocessing so I went and made a small project to try and run multiple processes which all share a single input and output queue. It works, but sometimes it deadlocks right after calling the scheduler.start() method.

I am using Python 3.8 on MacOS Catalina.

# coding=utf-8
from abc import abstractmethod, ABC
from multiprocessing import Process, Queue
from time import sleep
from typing import Optional, List, Dict, Union, Any
from uuid import uuid4


class AbstractTask(ABC):
    def __init__(self, input_queue: Optional[Queue] = None, output_queue: Optional[Queue] = None, /):
        self._input_queue = input_queue
        self._output_queue = output_queue

    @property
    def input_queue(self):
        return self._input_queue

    @input_queue.setter
    def input_queue(self, value: Queue):
        if self._input_queue is None:
            self._input_queue = value

    @property
    def output_queue(self):
        return self._output_queue

    @output_queue.setter
    def output_queue(self, value: Queue):
        if self._output_queue is None:
            self._output_queue = value

    @abstractmethod
    def run(self):
        pass


class SimpleTask(AbstractTask):
    def __init__(self, input_queue: Optional[Queue] = None, output_queue: Optional[Queue] = None, /):
        super().__init__(input_queue, output_queue)

    def run(self):
        while True:
            event = self.input_queue.get()
            print(f"Process ({id(self)} got a new event: {event}")
            self.output_queue.put(event)


class Calculation(Process):
    def __init__(self, _input: Queue, _output: Queue, _task: AbstractTask = None, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self._input = _input
        self._output = _output
        self._task = _task

        # debugging:
        print(f"{self.pid=}")
        print(f"input_queue id: {id(self._input)}")
        print(f"output_queue id: {id(self._output)}")

    def run(self) -> None:
        if self._task is not None:
            self._task.input_queue = self._input
            self._task.output_queue = self._output
            self._task.run()


class AbstractWorker(ABC):
    def __init__(self):
        """
        Abstract implementation for the Worker class.

        """
        # attributes
        self._identifier = int(uuid4())
        self._status = None
        self.error: bool = False
        self.process: Optional[Process] = None

        # initialisation
        self.initialize()

    @property
    def identifier(self):
        return self._identifier

    @property
    def status(self):
        return self._status

    @status.setter
    def status(self, value):
        if value is not None:
            self.status = value

    @abstractmethod
    def initialize(self):
        pass

    def get_identifier(self):
        return self.identifier

    def get_status(self):
        return self.status

    def start(self, _input: Queue, _output: Queue, _task: AbstractTask):
        # need: a sharable input queue
        # need: a queue to put results
        self.process = Calculation(_input, _output, _task, daemon=True)
        self.process.start()


class Worker(AbstractWorker):
    def __init__(self):
        super().__init__()

    def initialize(self):
        print(f"Created new Worker with UUID: {self.identifier}")


class Scheduler:
    def __init__(self, shared_input: Queue, shared_output: Queue):

        # Attributes
        self._workers: Dict[int, AbstractWorker] = dict()
        self._tasks: List[AbstractTask] = []
        self._shared_input: Queue = shared_input
        self._shared_output: Queue = shared_output

    @property
    def shared_input(self):
        if self._shared_input is None:
            raise ValueError("attribute 'shared_input' is None, never given.")
        return self._shared_input

    @shared_input.setter
    def shared_input(self, value: Queue):
        if isinstance(value, Queue):
            self._shared_input = value
        else:
            raise ValueError("Attribute 'shared_queue' must be of type 'multiprocessing.Queue'.")

    @property
    def shared_output(self):
        if self._shared_output is None:
            raise ValueError("attribute 'shared_output' is None, never given.")
        return self._shared_output

    @shared_output.setter
    def shared_output(self, value: Queue):
        if isinstance(value, Queue):
            self._shared_output = value
        else:
            raise ValueError("Attribute 'shared_output' must be of type 'multiprocessing.Queue'.")

    @property
    def tasks(self):
        return self._tasks

    @tasks.setter
    def tasks(self, new_task_list: List[AbstractTask]):
        if all([isinstance(task, AbstractTask) for task in new_task_list]):
            self._tasks = new_task_list
        else:
            raise ValueError("Every item in provided task list must be based of 'AbstractTask'.")

    @property
    def workers(self):
        return self._workers

    @workers.setter
    def workers(self, new_worker_list: Dict[int, AbstractWorker]):
        if all([isinstance(worker, AbstractWorker) for worker in new_worker_list.values()]):
            def _check(_worker: AbstractWorker):
                if _worker.status == 'WORKING':
                    return True

            if any(_check(current_worker) for current_worker in self.workers.values()):
                raise RuntimeError("Trying to set new workers while current workers are still running!")

            self._workers = new_worker_list

    def subscribe(self, worker: AbstractWorker):
        self.workers[worker.identifier] = worker

    def status(self, identifier: Optional[int] = None) -> Union[str, Dict[int, str]]:
        if identifier is None:
            return {identifier: worker.status for (identifier, worker) in self.workers.items()}
        return self.workers[identifier].status

    def start(self, task: AbstractTask):
        for worker in self.workers.values():
            worker.start(self.shared_input, self.shared_output, task)

    def register(self, event: Any):
        if self.shared_input:
            self.shared_input.put(event, block=False)


def main():
    _input_queue = Queue()
    _output_queue = Queue()
    scheduler = Scheduler(_input_queue, _output_queue)

    w1 = Worker()
    w2 = Worker()
    w3 = Worker()
    w4 = Worker()

    _workers = [w1, w2, w3, w4]

    for _worker in _workers:
        scheduler.subscribe(_worker)

    task = SimpleTask()
    scheduler.start(task)
    sleep(0.1)  # Prevent deadlocking, something to do with Threading Lock and the bootstrapping time on the processes

    for i in range(10000):
        scheduler.register(str(i))

    while not scheduler.shared_output.empty():
        result = scheduler.shared_output.get()
        print(f"Got a new result on main process: {result}")


if __name__ == '__main__':
    main()

I have 'solved' the issue using a simple sleep timer for less than a second, but as this solution is not elegant and ugly in my opinion, I went and looked for a more elegant solution. I have not found one. I think the error has something to do with the threads the Queue objects are using, there seems to be a race condition, but no way of checking (that I am aware of).

For those interested in the KeyboardInterrupt traceback I get when running this, I will put it below here:

Error in atexit._run_exitfuncs:
Traceback (most recent call last):
  File "/usr/local/Cellar/python@3.8/3.8.1/Frameworks/Python.framework/Versions/3.8/lib/python3.8/multiprocessing/util.py", line 277, in _run_finalizers
    finalizer()
  File "/usr/local/Cellar/python@3.8/3.8.1/Frameworks/Python.framework/Versions/3.8/lib/python3.8/multiprocessing/util.py", line 201, in __call__
    res = self._callback(*self._args, **self._kwargs)
  File "/usr/local/Cellar/python@3.8/3.8.1/Frameworks/Python.framework/Versions/3.8/lib/python3.8/multiprocessing/queues.py", line 195, in _finalize_join
    thread.join()
  File "/usr/local/Cellar/python@3.8/3.8.1/Frameworks/Python.framework/Versions/3.8/lib/python3.8/threading.py", line 1011, in join
    self._wait_for_tstate_lock()
  File "/usr/local/Cellar/python@3.8/3.8.1/Frameworks/Python.framework/Versions/3.8/lib/python3.8/threading.py", line 1027, in _wait_for_tstate_lock
    elif lock.acquire(block, timeout):
KeyboardInterrupt

EDIT 1: It seems to be a problem where the forking of the child processes puts a lock on the Queue objects, only question is why the locking is so relatively slow?

来源:https://stackoverflow.com/questions/61212290/multiprocessing-queue-deadlocking-during-process-spawning

易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!