问题
I am trying to use multithreading in python 3. to speed up some code execution.
Basically I have to run the same function on a iterable
channels=range(1,8)
I have made a working example of what I am using so far. and I am testing it against a normal execution
i don't see any significan difference. Maybe I am doing something wrong.
a little help would be appreciated
#!/usr/bin/env python
from threading import Thread
import matplotlib.pyplot as plt
import pdb
# from multiprocessing.dummy import Pool as ThreadPool
from multiprocessing.pool import ThreadPool
import threading
import argparse
import logging
from types import SimpleNamespace
import numpy as np
import time
import inspect
import logging
logger = logging.getLogger(__name__)
myself = lambda: inspect.stack()[1][3]
logger = logging.getLogger(__name__)
pool = ThreadPool(processes=8)
class ThreadWithReturnValue(Thread):
def __init__(self, group=None, target=None, name=None,
args=(), kwargs={}, Verbose=None):
Thread.__init__(self, group, target, name, args, kwargs)
self._return = None
def run(self):
print(type(self._target))
if self._target is not None:
self._return = self._target(*self._args,
**self._kwargs)
def join(self, *args):
Thread.join(self, *args)
return self._return
#--------
def map_kg1_efit(data,chan):
density = np.zeros(968)
for it in range(0,data.ntefit):
density[it] = it
for jj in range(0,data.ntkg1v):
density[it]=density[it]+jj
data.KG1LH_data.lid[chan] = density
# ----------------------------
def main():
data = SimpleNamespace()
data.KG1LH_data = SimpleNamespace()
data.ntkg1v = 30039
data.ntefit = 968
data.KG1LH_data.lid = [ [],[],[],[],[],[],[],[]]
channels=range(1,8)
# chan =1
for chan in channels:
logger.info('computing channel {}'.format(chan))
start_time = time.time()
twrv = ThreadWithReturnValue(target=map_kg1_efit, args=(data,chan))
# pdb.set_trace()
twrv.start()
twrv.join()
logger.info("--- {}s seconds ---".format((time.time() - start_time)))
plt.figure()
plt.plot(range(0,data.ntefit), data.KG1LH_data.lid[chan])
plt.show()
logger.info('computing channel {}'.format(chan))
start_time = time.time()
map_kg1_efit(data,chan)
logger.info("--- {}s seconds ---".format((time.time() - start_time)))
plt.figure()
plt.plot(range(0,data.ntefit), data.KG1LH_data.lid[chan])
plt.show()
logger.info("\n Finished.\n")
if __name__ == "__main__":
debug_map = {0: logging.ERROR,
1: logging.WARNING,
2: logging.INFO,
3: logging.DEBUG,
4: 5}
logging.basicConfig(level=debug_map[2])
logging.addLevelName(5, "DEBUG_PLUS")
logger = logging.getLogger(__name__)
# Call the main code
main()
回答1:
For this CPU-bound task you can use multiprocessing.pool.Pool
to get parallelism. Here's a reduced example that saturates all four cores on my system:
import matplotlib.pyplot as plt
from multiprocessing.pool import Pool
from types import SimpleNamespace
import numpy as np
def map_kg1_efit(arg):
data = arg[0]
chan = arg[1]
density = np.zeros(968)
for it in range(0,data.ntefit):
density[it] = it
for jj in range(0,data.ntkg1v):
density[it]=density[it]+jj
data.KG1LH_data.lid[chan] = density
return (data, chan)
if __name__ == "__main__":
data = SimpleNamespace()
data.KG1LH_data = SimpleNamespace()
data.ntkg1v = 30039
data.ntefit = 968
data.KG1LH_data.lid = [ [],[],[],[],[],[],[],[]]
with Pool(4) as pool:
results = pool.map(map_kg1_efit, [(data, chan) for chan in range(1, 8)])
for r in results:
plt.figure()
plt.plot(range(0,r[0].ntefit), r[0].KG1LH_data.lid[r[1]])
plt.show()
来源:https://stackoverflow.com/questions/56987313/how-to-use-multithreading-in-python-and-speed-up-code