Can't pickle when using multiprocessing Pool.map()

后端 未结 12 1769
醉梦人生
醉梦人生 2020-11-22 00:19

I\'m trying to use multiprocessing\'s Pool.map() function to divide out work simultaneously. When I use the following code, it works fine:

12条回答
  •  借酒劲吻你
    2020-11-22 00:55

    Some limitations though to Steven Bethard's solution :

    When you register your class method as a function, the destructor of your class is surprisingly called every time your method processing is finished. So if you have 1 instance of your class that calls n times its method, members may disappear between 2 runs and you may get a message malloc: *** error for object 0x...: pointer being freed was not allocated (e.g. open member file) or pure virtual method called, terminate called without an active exception (which means than the lifetime of a member object I used was shorter than what I thought). I got this when dealing with n greater than the pool size. Here is a short example :

    from multiprocessing import Pool, cpu_count
    from multiprocessing.pool import ApplyResult
    
    # --------- see Stenven's solution above -------------
    from copy_reg import pickle
    from types import MethodType
    
    def _pickle_method(method):
        func_name = method.im_func.__name__
        obj = method.im_self
        cls = method.im_class
        return _unpickle_method, (func_name, obj, cls)
    
    def _unpickle_method(func_name, obj, cls):
        for cls in cls.mro():
            try:
                func = cls.__dict__[func_name]
            except KeyError:
                pass
            else:
                break
        return func.__get__(obj, cls)
    
    
    class Myclass(object):
    
        def __init__(self, nobj, workers=cpu_count()):
    
            print "Constructor ..."
            # multi-processing
            pool = Pool(processes=workers)
            async_results = [ pool.apply_async(self.process_obj, (i,)) for i in range(nobj) ]
            pool.close()
            # waiting for all results
            map(ApplyResult.wait, async_results)
            lst_results=[r.get() for r in async_results]
            print lst_results
    
        def __del__(self):
            print "... Destructor"
    
        def process_obj(self, index):
            print "object %d" % index
            return "results"
    
    pickle(MethodType, _pickle_method, _unpickle_method)
    Myclass(nobj=8, workers=3)
    # problem !!! the destructor is called nobj times (instead of once)
    

    Output:

    Constructor ...
    object 0
    object 1
    object 2
    ... Destructor
    object 3
    ... Destructor
    object 4
    ... Destructor
    object 5
    ... Destructor
    object 6
    ... Destructor
    object 7
    ... Destructor
    ... Destructor
    ... Destructor
    ['results', 'results', 'results', 'results', 'results', 'results', 'results', 'results']
    ... Destructor
    

    The __call__ method is not so equivalent, because [None,...] are read from the results :

    from multiprocessing import Pool, cpu_count
    from multiprocessing.pool import ApplyResult
    
    class Myclass(object):
    
        def __init__(self, nobj, workers=cpu_count()):
    
            print "Constructor ..."
            # multiprocessing
            pool = Pool(processes=workers)
            async_results = [ pool.apply_async(self, (i,)) for i in range(nobj) ]
            pool.close()
            # waiting for all results
            map(ApplyResult.wait, async_results)
            lst_results=[r.get() for r in async_results]
            print lst_results
    
        def __call__(self, i):
            self.process_obj(i)
    
        def __del__(self):
            print "... Destructor"
    
        def process_obj(self, i):
            print "obj %d" % i
            return "result"
    
    Myclass(nobj=8, workers=3)
    # problem !!! the destructor is called nobj times (instead of once), 
    # **and** results are empty !
    

    So none of both methods is satisfying...

提交回复
热议问题