how to use Multithreading in python and speed up code

感情迁移 提交于 2021-02-05 11:42:09

问题


I am trying to use multithreading in python 3. to speed up some code execution.

Basically I have to run the same function on a iterable

channels=range(1,8)

I have made a working example of what I am using so far. and I am testing it against a normal execution

i don't see any significan difference. Maybe I am doing something wrong.

a little help would be appreciated

#!/usr/bin/env python


from threading import Thread

import matplotlib.pyplot as plt
import pdb
# from multiprocessing.dummy import Pool as ThreadPool
from multiprocessing.pool import ThreadPool
import threading
import argparse
import logging
from types import SimpleNamespace
import numpy as np
import time
import inspect
import logging

logger = logging.getLogger(__name__)

myself = lambda: inspect.stack()[1][3]
logger = logging.getLogger(__name__)
pool = ThreadPool(processes=8)

class ThreadWithReturnValue(Thread):
    def __init__(self, group=None, target=None, name=None,
                 args=(), kwargs={}, Verbose=None):
        Thread.__init__(self, group, target, name, args, kwargs)
        self._return = None
    def run(self):
        print(type(self._target))
        if self._target is not None:
            self._return = self._target(*self._args,
                                                **self._kwargs)
    def join(self, *args):
        Thread.join(self, *args)
        return self._return




#--------
def map_kg1_efit(data,chan):


    density = np.zeros(968)


    for it in range(0,data.ntefit):
        density[it] = it
        for jj in range(0,data.ntkg1v):
            density[it]=density[it]+jj

    data.KG1LH_data.lid[chan] = density

# ----------------------------

def main():
    data = SimpleNamespace()
    data.KG1LH_data = SimpleNamespace()
    data.ntkg1v = 30039
    data.ntefit = 968

    data.KG1LH_data.lid = [ [],[],[],[],[],[],[],[]]

    channels=range(1,8)



    # chan =1
    for chan in channels:
        logger.info('computing channel {}'.format(chan))
        start_time = time.time()
        twrv = ThreadWithReturnValue(target=map_kg1_efit, args=(data,chan))
        # pdb.set_trace()
        twrv.start()
        twrv.join()
        logger.info("--- {}s seconds ---".format((time.time() - start_time)))
        plt.figure()
        plt.plot(range(0,data.ntefit), data.KG1LH_data.lid[chan])
        plt.show()




        logger.info('computing channel {}'.format(chan))
        start_time = time.time()
        map_kg1_efit(data,chan)
        logger.info("--- {}s seconds ---".format((time.time() - start_time)))

        plt.figure()
        plt.plot(range(0,data.ntefit), data.KG1LH_data.lid[chan])
        plt.show()



    logger.info("\n             Finished.\n")

if __name__ == "__main__":
    debug_map = {0: logging.ERROR,
                 1: logging.WARNING,
                 2: logging.INFO,
                 3: logging.DEBUG,
                 4: 5}

    logging.basicConfig(level=debug_map[2])

    logging.addLevelName(5, "DEBUG_PLUS")

    logger = logging.getLogger(__name__)



    # Call the main code
    main()

回答1:


For this CPU-bound task you can use multiprocessing.pool.Pool to get parallelism. Here's a reduced example that saturates all four cores on my system:

import matplotlib.pyplot as plt          
from multiprocessing.pool import Pool    
from types import SimpleNamespace        
import numpy as np                       

def map_kg1_efit(arg):             
    data = arg[0]               
    chan = arg[1]    
    density = np.zeros(968)    
    for it in range(0,data.ntefit):    
        density[it] = it                   
        for jj in range(0,data.ntkg1v):    
            density[it]=density[it]+jj     
    data.KG1LH_data.lid[chan] = density                                     
    return (data, chan)    

if __name__ == "__main__":    
    data = SimpleNamespace()    
    data.KG1LH_data = SimpleNamespace()    
    data.ntkg1v = 30039    
    data.ntefit = 968      
    data.KG1LH_data.lid = [ [],[],[],[],[],[],[],[]]    
    with Pool(4) as pool:    
        results = pool.map(map_kg1_efit, [(data, chan) for chan in range(1, 8)])    
    for r in results:    
        plt.figure()     
        plt.plot(range(0,r[0].ntefit), r[0].KG1LH_data.lid[r[1]])    
    plt.show()


来源:https://stackoverflow.com/questions/56987313/how-to-use-multithreading-in-python-and-speed-up-code

易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!