“OSError: size mismatch in get!” when retrieving files via SFTP using python

耗尽温柔 提交于 2019-12-08 13:24:52

问题


I wrote a python script to download files from SFTP server using python using multithreading so that it could connect to multiple servers at a time and download files from them parallelly.

It works fine for up to 10 connections but if there are 25 connections then this error shows up

suppose there are 5000 files of size 130mb(almost) on each server to be downloaded

The code often works successfully on subsequent tries or will run successfully for the first few files in the date range and then error out in the middle of downloading all the files I need to retrieve(see error below)

whats reason of the error and please help me to solve it. Thanks in advance

My Code:

import sys, os, string, threading
import paramiko
import os
import pysftp
import csv
import socket
from stat import S_ISDIR, S_ISREG
import time
import threading
from threading import Thread
from time import sleep
import os.path
import shutil
import lock

threading.Lock()

privatekeyfile = os.path.expanduser("C:\\Users\\Rohan\\.ssh\\cool.prv")          # public key authentication
mykey = paramiko.RSAKey.from_private_key_file(privatekeyfile)

cnopts = pysftp.CnOpts()
cnopts.hostkeys = None


def workon(serverad,user, textfile, serverpath, local_path_temp):

            with pysftp.Connection(host=serverad, username=user,private_key=mykey, cnopts=cnopts) as sftp:
                r = (socket.gethostbyaddr(serverad))
                print("connection successful with ",r)          # connect to SFTP server

                def get_file2(sftp ,remotedir):          # make the list of all the files on server path of last 2 days
                    result = []
                    for entry in sftp.listdir_attr(remotedir):
                        remotepath = remotedir + "/" + entry.filename
                        mode = entry.st_mode
                        if S_ISDIR(mode):
                            result += get_file2(sftp, remotepath)
                        elif S_ISREG(mode):
                            if (time.time() - entry.st_mtime) // (24 * 3600) > 0:
                                result.append(entry.filename)

                    return result


                remote_path = serverpath
                d = get_file2(sftp, remote_path)


                def process():
                    myname = []                 # make list of files already in log file

                    for name in j.readlines():
                        while '\n' in name:
                            name = name.replace('\n', '')
                            myname.append(name)


                    import os

                    filtered_list = [string for string in d if string not in myname]    #  make filtered list of files preent on server but not in local path
                    print("filtered list:", filtered_list)
                    print(len(filtered_list))

                    local_path = local_path_temp

                    def compare_files(sftp, remotedir, remotefile, localdir, preserve_mtime=True):
                        remotepath = remotedir + "/" + remotefile    # download the files inside filtered list
                        localpath = os.path.join(localdir, remotefile)
                        mode = sftp.stat(remotepath).st_mode
                        if S_ISDIR(mode):
                            try:
                                os.mkdir(localpath, mode=777)
                            except OSError:
                                pass
                            compare_files(sftp, remotepath, localpath, preserve_mtime)
                        elif S_ISREG(mode):
                            sftp.get(remotepath, localpath, preserve_mtime=True)

                    for files in filtered_list:
                        compare_files(sftp, remote_path, files, local_path, preserve_mtime=False)

                    with open(filename, 'a') as f:  # append the downloaded file name in the log file
                        for item in filtered_list:
                            f.write("%s\n" % item)

                    with open(filename, 'r') as f:
                        unique_lines = set(f.readlines())
                        h = sorted(unique_lines)
                    with open(filename, 'w') as f:
                        f.writelines(h)

                filename=textfile   # if the log file does not exist it can create a new log file
                try:
                    with open(filename, 'r+') as j:
                        process()
                except IOError:
                    with open(filename, 'w+') as j:
                        process()


def main():
    threads=[]
    config_file_path = "config15.txt"           # read config file and assign IP,username,logfile,serverpath,localpath
    file = open(config_file_path, 'r')
    reader = csv.reader(file)
    all_rows = [row for row in reader]
    for line in all_rows:
        if len(line) != 5:
            continue
        server_ip = line[0]
        username = line[1]
        txt_file = line[2]
        server_path = line[3]
        local_path = line[4]
        t = threading.Thread(target=workon, args=(server_ip, username, txt_file, server_path, local_path)) # use of multithreading
        t.start()
        threads.append(t)
        with open(config_file_path) as f:              # adjust sleep time according to connections in config file
            if(sum(1 for _ in f)) >= 5:
                sleep(5)
            else:
                sleep(1)

    for t in threads:
        t.join()


if __name__ == "__main__":      # execute main function
    main()

Error:

Traceback (most recent call last):
  File "C:\Users\Rohan\AppData\Local\Programs\Python\Python37-32\lib\threading.py", line 917, in _bootstrap_inner
    self.run()
  File "C:\Users\Rohan\AppData\Local\Programs\Python\Python37-32\lib\threading.py", line 865, in run
    self._target(*self._args, **self._kwargs)
  File "C:/Users/Rohan/PycharmProjects/untitled1/th8.py", line 98, in workon
    process()
  File "C:/Users/Rohan/PycharmProjects/untitled1/th8.py", line 80, in process
    compare_files(sftp, remote_path, files, local_path, preserve_mtime=False)
  File "C:/Users/Rohan/PycharmProjects/untitled1/th8.py", line 77, in compare_files
    sftp.get(remotepath, localpath, preserve_mtime=True)
  File "C:\Users\Rohan\PycharmProjects\untitled1\venv\lib\site-packages\pysftp\__init__.py", line 249, in get
    self._sftp.get(remotepath, localpath, callback=callback)
  File "C:\Users\Rohan\PycharmProjects\untitled1\venv\lib\site-packages\paramiko\sftp_client.py", line 806, in get
    "size mismatch in get!  {} != {}".format(s.st_size, size)
OSError: size mismatch in get!  0 != 275856

来源:https://stackoverflow.com/questions/56561965/oserror-size-mismatch-in-get-when-retrieving-files-via-sftp-using-python

易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!