问题
I'm trying to create a code to download N files at the same type using pysftp and multiprocessing libs. I made a basic python training, got pieces of codes and combined them into one, but I can't get it work out. I'd appreciate if somebody helps me with that. The error occurs after the vFtp.close() command. In the part that suppose to start simultaneous downloads.
from multiprocessing import Pool
import pysftp
import os
vHost='10.11.12.13'
vLogin='admin'
vPwd='pass1234'
vFtpPath='/export/home/'
os.chdir('d:/test/')
os.getcwd()
cnopts=pysftp.CnOpts()
cnopts.hostkeys = None
vFtp=pysftp.Connection(vHost,username=vLogin,password=vPwd,cnopts=cnopts)
vFtp.cwd(vFtpPath)
vObjectList=vFtp.listdir()
vFileList=[]
vFoldList=[]
for vObject in vObjectList:
vType=str(vFtp.lstat(vObject))[:1]
if vType!='d':
vFileList.append(vObject)
else:
vFoldList.append(vObject)
vFtp.close()
def fDownload(vFileAux):
vFtpAux=pysftp.Connection(vHost,username=vLogin,password=vPwd,cnopts=cnopts)
vFtpAux.cwd(vFtpPath)
vFtpAux.get(vFileAux,preserve_mtime=True)
vFtpAux.close()
if __name__ == "__main__":
vPool=Pool(3)
vPool.map(fDownload,vFileList)
回答1:
It looks like you're trying to get the list of files then download them concurrently using multiple processes.
Instead of manually examining the files, try using the walktree
method on the connection object: pysftp walktree
Here is a working example I made in Python 3.5. I'm just using a local ftp server and small files, so I simulated a download delay. Change the max_workers
argument to set the number of simultaneous downloads.
"""Demo using sftp to download files simultaneously."""
import pysftp
import os
from concurrent.futures import ProcessPoolExecutor
import time
def do_nothing(s):
"""
Using this as the callback for directories and unknown items found
using walktree.
"""
pass
def download(file):
"""
Simulates a 1-second download.
"""
with pysftp.Connection(
host='convox', username='abc', private_key='/home/abc/test') as sftp:
time.sleep(1)
print('Downloading {}'.format(file))
sftp.get(file)
def get_list_of_files(remote_dir):
"""
Walks remote directory tree and returns list of files.
"""
with pysftp.Connection(
host='convox', username='abc', private_key='/home/abc/test') as sftp:
files = []
# if this finds a file it will send the filename to the file callback
# which in this case just appends to the 'files' list
sftp.walktree(remote_dir, fcallback=files.append,
dcallback=do_nothing, ucallback=do_nothing)
return files
if __name__ == '__main__':
remote_dir = '/home/abc/remoteftp/'
download_target = '/home/abc/localftp/'
# if you don't specify a localpath in sftp.get then it just downloads to
# the os cwd, so set it here
os.chdir(download_target)
files = get_list_of_files(remote_dir)
pool = ProcessPoolExecutor(max_workers=4)
pool.map(download, files)
edit: ProcessPoolExecutor
is for running something on multiple cpu cores and will be limited by your processor. For network tasks like downloading you can use threads instead. In the above code this is only one change: instead of ProcessPoolExecutor
import and use ThreadPoolExecutor
. Then you can use more max_workers
.
来源:https://stackoverflow.com/questions/45653213/parallel-downloads-with-multiprocessing-and-pysftp