I need to download a file to a given location on a non-local machine. This is the normal flow of the web browser for which I would do this:
When you initialize your driver, be sure to set the download preferences.
For Firefox:
ff_prof.set_preference( "browser.download.manager.showWhenStarting", False )
ff_prof.set_preference( "browser.download.folderList", 2 )
ff_prof.set_preference( "browser.download.useDownloadDir", True )
ff_prof.set_preference( "browser.download.dir", self.driver_settings['download_folder'] )
##
# if FF still shows the download dialog, make sure that the filetype is included below
# filetype string options can be found in '~/.mozilla/$USER_PROFILE/mimeTypes.rdf'
##
mime_types = ("application/pdf", "text/html")
ff_prof.set_preference( "browser.helperApps.neverAsk.saveToDisk", (", ".join( mime_types )) )
ff_prof.set_preference( "browser.helperApps.neverAsk.openFile", (", ".join( mime_types )) )
For Chrome:
capabilities['chromeOptions']['prefs']['download.prompt_for_download'] = False
capabilities['chromeOptions']['prefs']['download.default_directory'] = self.driver_settings['download_folder']
Forwarding the download:
Below is the code I use to redirect the file from self.driver_settings['download_folder']
(set above) to where you actually want the file (to_path
can be an existing folder or a filepath). If you're on linux, I'd suggest using tmpfs
so that /tmp
is held in ram and then set self.driver_settings['download_folder']
to "/tmp/driver_downloads/"
. Note that the below function assumes that self.driver_settings['download_folder']
always begins as an empty folder (this is how it locates the file being downloaded, since it's the only one in the directory).
def moveDriverDownload(self, to_path, allowable_extensions, allow_rename_if_exists=False, timeout_seconds=None):
if timeout_seconds is None:
timeout_seconds = 30
wait_delta = timedelta( seconds=timeout_seconds )
start_download_time = datetime.now()
hasTimedOut = lambda: datetime.now() - start_download_time > wait_delta
assert isinstance(allowable_extensions, list) or isinstance(allowable_extensions, tuple) or isinstance(allowable_extensions, set), "instead of a list, found allowable_extensions type of '{}'".format(type(allowable_extensions))
allowable_extensions = [ elem.lower().strip() for elem in allowable_extensions ]
allowable_extensions = [ elem if elem.startswith(".") else "."+elem for elem in allowable_extensions ]
if not ".part" in allowable_extensions:
allowable_extensions.append( ".part" )
re_extension_str = "(?:" + ("$)|(?:".join( re.escape(elem) for elem in allowable_extensions )) + "$)"
getFiles = lambda: next( os.walk( self.driver_settings['download_folder'] ) )[2]
while True:
if hasTimedOut():
del allowable_extensions[ allowable_extensions.index(".part") ]
raise DownloadTimeoutError( "timed out after {} seconds while waiting on file download with extension in {}".format(timeout_seconds, allowable_extensions) )
time.sleep( 0.5 )
file_list = [ elem for elem in getFiles() if re.search( re_extension_str, elem ) ]
if len(file_list) > 0:
break
file_list = [ re.search( r"(?i)^(.*?)(?:\.part)?$", elem ).groups()[0] for elem in file_list ]
if len(file_list) > 1:
if len(file_list) == 2:
if file_list[0] != file_list[1]:
raise Exception( "file_list[0] != file_list[1] <==> {} != {}".format(file_list[0], file_list[1]) )
else:
raise Exception( "len(file_list) > 1. found {}".format(file_list) )
file_path = "%s%s" %(self.driver_settings['download_folder'], file_list[0])
# see if the file is still being downloaded by checking if it's open by any programs
if platform.system() == "Linux":
openProcess = lambda: subprocess.Popen( 'lsof | grep "%s"' %file_path, shell=True, stdout=subprocess.PIPE, stdin=subprocess.PIPE, stderr=subprocess.PIPE )
fileIsFinished = lambda txt: txt.strip() == ""
elif platform.system() == "Windows":
# 'handle' program must be in PATH
# https://technet.microsoft.com/en-us/sysinternals/bb896655
openProcess = lambda: subprocess.Popen( 'handle "%s"' %file_path.replace("/", "\\"), shell=True, stdout=subprocess.PIPE, stdin=subprocess.PIPE, stderr=subprocess.PIPE )
fileIsFinished = lambda txt: bool( re.search("(?i)No matching handles found", txt) )
else:
raise Exception( "unrecognised platform.system() of '{}'".format(platform.system()) )
while True:
lsof_process = openProcess()
lsof_result = lsof_process.communicate()
if len(lsof_result) != 2:
raise Exception( "len(lsof_result) != 2. found {}".format(lsof_result) )
if lsof_result[1].strip() != "":
raise Exception( 'lsof_result[1].strip() != "". found {}'.format(lsof_result) )
if fileIsFinished( lsof_result[0] ):
break
if hasTimedOut():
raise Exception( "timed out after {} seconds waiting for '{}' to be freed from writing. found lsof/handle of '{}'".format(timeout_seconds, file_path, lsof_result[0]) )
time.sleep( 0.5 )
to_path = to_path.replace("\\", "/")
if os.path.isdir( to_path ):
if not to_path.endswith("/"):
to_path += "/"
to_path += file_list[0]
i = 2
while os.path.exists( to_path ):
if not allow_rename_if_exists:
raise Exception( "{} already exists".format(to_path) )
to_path = re.sub( "^(.*/)(.*?)(?:-" + str(i-1) + r")?(|\..*?)?$", r"\1\2-%i\3" %i, to_path )
i += 1
shutil.move( file_path, to_path )
return to_path[ to_path.rindex("/")+1: ]
Use selenium webdriver
Use firefox profile to download your files. This profile skip that dialogue box of firefox. In line:-
pro.setPreference("browser.downLoad.folderList", 0);
The value of browser.download.folderList can be set to either 0, 1, or 2. When set to 0, Firefox will save all files downloaded via the browser on the user's desktop. When set to 1, these downloads are stored in the Downloads folder. When set to 2, the location specified for the most recent download is utilized again.
Firefox profile code that you need to implement :-
FirefoxProfile pro=new FirefoxProfile();
pro.setPreference("browser.downLoad.folderList", 0);
pro.setPreference("browser.helperApps.neverAsk.saveToDisk", "Applications/zip");
WebDriver driver=new FirefoxDriver(pro);
driver.get("http://selenium-release.storage.googleapis.com/2.47/selenium-java-2.47.1.zip");
Hope it will help you :)
You would have to examine the javascript on the website and understand how it works before you could override it to do something like that, but even then, browser security will always pop a dialog asking you to confirm the download. That leaves you with two options (as far as I can see):
I can't really help with the details on either, since I don't know python, but hopefully that helps...