I\'m trying to find a way to download multiple files asynchronously in Python(2.6) preferably via Requests Module. Gevent and Twisted will also be acceptable as I\'ll be learnin
You don't need to use any external library or framework for such a simple task, put the list of urls in a queue, start 4 threads and each thread should take an item from queue and download it.
something like this:
import sys
import os
import urllib
import threading
from Queue import Queue
class DownloadThread(threading.Thread):
def __init__(self, queue, destfolder):
super(DownloadThread, self).__init__()
self.queue = queue
self.destfolder = destfolder
self.daemon = True
def run(self):
while True:
url = self.queue.get()
try:
self.download_url(url)
except Exception,e:
print " Error: %s"%e
self.queue.task_done()
def download_url(self, url):
# change it to a different way if you require
name = url.split('/')[-1]
dest = os.path.join(self.destfolder, name)
print "[%s] Downloading %s -> %s"%(self.ident, url, dest)
urllib.urlretrieve(url, dest)
def download(urls, destfolder, numthreads=4):
queue = Queue()
for url in urls:
queue.put(url)
for i in range(numthreads):
t = DownloadThread(queue, destfolder)
t.start()
queue.join()
if __name__ == "__main__":
download(sys.argv[1:], "/tmp")
usage:
$ python download.py http://en.wikipedia.org/wiki/1 http://en.wikipedia.org/wiki/2 http://en.wikipedia.org/wiki/3 http://en.wikipedia.org/wiki/4
[4456497152] Downloading http://en.wikipedia.org/wiki/1 -> /tmp/1
[4457033728] Downloading http://en.wikipedia.org/wiki/2 -> /tmp/2
[4457701376] Downloading http://en.wikipedia.org/wiki/3 -> /tmp/3
[4458258432] Downloading http://en.wikipedia.org/wiki/4 -> /tmp/4