Download a file to a specific path using Selenium WebDriver

前端 未结 3 1395
春和景丽
春和景丽 2021-01-15 13:54

I need to download a file to a given location on a non-local machine. This is the normal flow of the web browser for which I would do this:

  • Go to website
3条回答
  •  借酒劲吻你
    2021-01-15 14:45

    When you initialize your driver, be sure to set the download preferences.

    For Firefox:

    ff_prof.set_preference( "browser.download.manager.showWhenStarting", False )
    ff_prof.set_preference( "browser.download.folderList", 2 )
    ff_prof.set_preference( "browser.download.useDownloadDir", True )
    ff_prof.set_preference( "browser.download.dir", self.driver_settings['download_folder'] )
    
    ##
    # if FF still shows the download dialog, make sure that the filetype is included below
    # filetype string options can be found in '~/.mozilla/$USER_PROFILE/mimeTypes.rdf'
    ##
    mime_types = ("application/pdf", "text/html")
    
    ff_prof.set_preference( "browser.helperApps.neverAsk.saveToDisk", (", ".join( mime_types )) )
    ff_prof.set_preference( "browser.helperApps.neverAsk.openFile", (", ".join( mime_types )) )
    

    For Chrome:

    capabilities['chromeOptions']['prefs']['download.prompt_for_download'] = False
    capabilities['chromeOptions']['prefs']['download.default_directory'] = self.driver_settings['download_folder']
    

    Forwarding the download:

    Below is the code I use to redirect the file from self.driver_settings['download_folder'] (set above) to where you actually want the file (to_path can be an existing folder or a filepath). If you're on linux, I'd suggest using tmpfs so that /tmp is held in ram and then set self.driver_settings['download_folder'] to "/tmp/driver_downloads/". Note that the below function assumes that self.driver_settings['download_folder'] always begins as an empty folder (this is how it locates the file being downloaded, since it's the only one in the directory).

    def moveDriverDownload(self, to_path, allowable_extensions, allow_rename_if_exists=False, timeout_seconds=None):
        if timeout_seconds is None:
            timeout_seconds = 30
        wait_delta = timedelta( seconds=timeout_seconds )
        start_download_time = datetime.now()
        hasTimedOut = lambda: datetime.now() - start_download_time > wait_delta
    
        assert isinstance(allowable_extensions, list) or isinstance(allowable_extensions, tuple) or isinstance(allowable_extensions, set), "instead of a list, found allowable_extensions type of '{}'".format(type(allowable_extensions))
        allowable_extensions = [ elem.lower().strip() for elem in allowable_extensions ]
        allowable_extensions = [ elem if elem.startswith(".") else "."+elem for elem in allowable_extensions ]
    
        if not ".part" in allowable_extensions:
            allowable_extensions.append( ".part" )
    
        re_extension_str = "(?:" + ("$)|(?:".join( re.escape(elem) for elem in allowable_extensions )) + "$)"
    
        getFiles = lambda: next( os.walk( self.driver_settings['download_folder'] ) )[2]
    
        while True:
            if hasTimedOut():
                del allowable_extensions[ allowable_extensions.index(".part") ]
                raise DownloadTimeoutError( "timed out after {} seconds while waiting on file download with extension in {}".format(timeout_seconds, allowable_extensions) )
    
            time.sleep( 0.5 )
    
            file_list = [ elem for elem in getFiles() if re.search( re_extension_str, elem ) ]
            if len(file_list) > 0:
                break
    
        file_list = [ re.search( r"(?i)^(.*?)(?:\.part)?$", elem ).groups()[0] for elem in file_list ]
    
        if len(file_list) > 1:
            if len(file_list) == 2:
                if file_list[0] != file_list[1]:
                    raise Exception( "file_list[0] != file_list[1] <==> {} != {}".format(file_list[0], file_list[1]) )
            else:
                raise Exception( "len(file_list) > 1. found {}".format(file_list) )
    
        file_path = "%s%s" %(self.driver_settings['download_folder'], file_list[0])
    
        # see if the file is still being downloaded by checking if it's open by any programs
        if platform.system() == "Linux":
            openProcess = lambda: subprocess.Popen( 'lsof | grep "%s"' %file_path, shell=True, stdout=subprocess.PIPE, stdin=subprocess.PIPE, stderr=subprocess.PIPE )
            fileIsFinished = lambda txt: txt.strip() == ""
        elif platform.system() == "Windows":
            # 'handle' program must be in PATH
            # https://technet.microsoft.com/en-us/sysinternals/bb896655
            openProcess = lambda: subprocess.Popen( 'handle "%s"' %file_path.replace("/", "\\"), shell=True, stdout=subprocess.PIPE, stdin=subprocess.PIPE, stderr=subprocess.PIPE )
            fileIsFinished = lambda txt: bool( re.search("(?i)No matching handles found", txt) )
        else:
            raise Exception( "unrecognised platform.system() of '{}'".format(platform.system()) )
    
        while True:
            lsof_process = openProcess()
            lsof_result = lsof_process.communicate()
    
            if len(lsof_result) != 2:
                raise Exception( "len(lsof_result) != 2. found {}".format(lsof_result) )
            if lsof_result[1].strip() != "":
                raise Exception( 'lsof_result[1].strip() != "". found {}'.format(lsof_result) )
            if fileIsFinished( lsof_result[0] ):
                break
    
            if hasTimedOut():
                raise Exception( "timed out after {} seconds waiting for '{}' to be freed from writing. found lsof/handle of '{}'".format(timeout_seconds, file_path, lsof_result[0]) )
    
            time.sleep( 0.5 )
    
        to_path = to_path.replace("\\", "/")
        if os.path.isdir( to_path ):
            if not to_path.endswith("/"):
                to_path += "/"
    
            to_path += file_list[0]
    
        i = 2
        while os.path.exists( to_path ):
            if not allow_rename_if_exists:
                raise Exception( "{} already exists".format(to_path) )
    
            to_path = re.sub( "^(.*/)(.*?)(?:-" + str(i-1) + r")?(|\..*?)?$", r"\1\2-%i\3" %i, to_path )
            i += 1
    
        shutil.move( file_path, to_path )
    
        return to_path[ to_path.rindex("/")+1: ]
    

提交回复
热议问题