问题
I wrote a python script to download files from SFTP server using python using multithreading so that it could connect to multiple servers at a time and download files from them parallelly.
It works fine for up to 10 connections but if there are 25 connections then this error shows up
suppose there are 5000 files of size 130mb(almost) on each server to be downloaded
The code often works successfully on subsequent tries or will run successfully for the first few files in the date range and then error out in the middle of downloading all the files I need to retrieve(see error below)
whats reason of the error and please help me to solve it. Thanks in advance
My Code:
import sys, os, string, threading
import paramiko
import os
import pysftp
import csv
import socket
from stat import S_ISDIR, S_ISREG
import time
import threading
from threading import Thread
from time import sleep
import os.path
import shutil
import lock
threading.Lock()
privatekeyfile = os.path.expanduser("C:\\Users\\Rohan\\.ssh\\cool.prv") # public key authentication
mykey = paramiko.RSAKey.from_private_key_file(privatekeyfile)
cnopts = pysftp.CnOpts()
cnopts.hostkeys = None
def workon(serverad,user, textfile, serverpath, local_path_temp):
with pysftp.Connection(host=serverad, username=user,private_key=mykey, cnopts=cnopts) as sftp:
r = (socket.gethostbyaddr(serverad))
print("connection successful with ",r) # connect to SFTP server
def get_file2(sftp ,remotedir): # make the list of all the files on server path of last 2 days
result = []
for entry in sftp.listdir_attr(remotedir):
remotepath = remotedir + "/" + entry.filename
mode = entry.st_mode
if S_ISDIR(mode):
result += get_file2(sftp, remotepath)
elif S_ISREG(mode):
if (time.time() - entry.st_mtime) // (24 * 3600) > 0:
result.append(entry.filename)
return result
remote_path = serverpath
d = get_file2(sftp, remote_path)
def process():
myname = [] # make list of files already in log file
for name in j.readlines():
while '\n' in name:
name = name.replace('\n', '')
myname.append(name)
import os
filtered_list = [string for string in d if string not in myname] # make filtered list of files preent on server but not in local path
print("filtered list:", filtered_list)
print(len(filtered_list))
local_path = local_path_temp
def compare_files(sftp, remotedir, remotefile, localdir, preserve_mtime=True):
remotepath = remotedir + "/" + remotefile # download the files inside filtered list
localpath = os.path.join(localdir, remotefile)
mode = sftp.stat(remotepath).st_mode
if S_ISDIR(mode):
try:
os.mkdir(localpath, mode=777)
except OSError:
pass
compare_files(sftp, remotepath, localpath, preserve_mtime)
elif S_ISREG(mode):
sftp.get(remotepath, localpath, preserve_mtime=True)
for files in filtered_list:
compare_files(sftp, remote_path, files, local_path, preserve_mtime=False)
with open(filename, 'a') as f: # append the downloaded file name in the log file
for item in filtered_list:
f.write("%s\n" % item)
with open(filename, 'r') as f:
unique_lines = set(f.readlines())
h = sorted(unique_lines)
with open(filename, 'w') as f:
f.writelines(h)
filename=textfile # if the log file does not exist it can create a new log file
try:
with open(filename, 'r+') as j:
process()
except IOError:
with open(filename, 'w+') as j:
process()
def main():
threads=[]
config_file_path = "config15.txt" # read config file and assign IP,username,logfile,serverpath,localpath
file = open(config_file_path, 'r')
reader = csv.reader(file)
all_rows = [row for row in reader]
for line in all_rows:
if len(line) != 5:
continue
server_ip = line[0]
username = line[1]
txt_file = line[2]
server_path = line[3]
local_path = line[4]
t = threading.Thread(target=workon, args=(server_ip, username, txt_file, server_path, local_path)) # use of multithreading
t.start()
threads.append(t)
with open(config_file_path) as f: # adjust sleep time according to connections in config file
if(sum(1 for _ in f)) >= 5:
sleep(5)
else:
sleep(1)
for t in threads:
t.join()
if __name__ == "__main__": # execute main function
main()
Error:
Traceback (most recent call last):
File "C:\Users\Rohan\AppData\Local\Programs\Python\Python37-32\lib\threading.py", line 917, in _bootstrap_inner
self.run()
File "C:\Users\Rohan\AppData\Local\Programs\Python\Python37-32\lib\threading.py", line 865, in run
self._target(*self._args, **self._kwargs)
File "C:/Users/Rohan/PycharmProjects/untitled1/th8.py", line 98, in workon
process()
File "C:/Users/Rohan/PycharmProjects/untitled1/th8.py", line 80, in process
compare_files(sftp, remote_path, files, local_path, preserve_mtime=False)
File "C:/Users/Rohan/PycharmProjects/untitled1/th8.py", line 77, in compare_files
sftp.get(remotepath, localpath, preserve_mtime=True)
File "C:\Users\Rohan\PycharmProjects\untitled1\venv\lib\site-packages\pysftp\__init__.py", line 249, in get
self._sftp.get(remotepath, localpath, callback=callback)
File "C:\Users\Rohan\PycharmProjects\untitled1\venv\lib\site-packages\paramiko\sftp_client.py", line 806, in get
"size mismatch in get! {} != {}".format(s.st_size, size)
OSError: size mismatch in get! 0 != 275856
来源:https://stackoverflow.com/questions/56561965/oserror-size-mismatch-in-get-when-retrieving-files-via-sftp-using-python