Download file using HtmlUnit

后端 未结 6 1935
一生所求
一生所求 2021-02-05 23:17

I am trying to download xls file for a website. When I click the link to download the file, I get a javascript confirm box. I handle it like below

    ConfirmHan         


        
相关标签:
6条回答
  • 2021-02-05 23:39

    Figure out the download URL, and scrape it in List. from the download url we can get the entire file using this code.

        try{
            String path = "your destination path";
            List<HtmlElement> downloadfiles = (List<HtmlElement>) page.getByXPath("the tag you want to scrape");
            if (downloadfiles.isEmpty()) {
                System.out.println("No items found !");
            } else {
                for (HtmlElement htmlItem : downloadfiles) {
                    String DownloadURL = htmlItem.getHrefAttribute();
    
                    Page invoicePdf = client.getPage(DownloadURL);
                    if (invoicePdf.getWebResponse().getContentType().equals("application/pdf")) {
                        System.out.println("creatign PDF:");
                        IOUtils.copy(invoicePdf.getWebResponse().getContentAsStream(),
                                new FileOutputStream(path + "file name"));
                    }
                }
            }
        } catch (Exception e) {
            e.printStackTrace();
        }
    
    0 讨论(0)
  • 2021-02-05 23:46

    I found a way to get InputStream using WebWindowListener. Inside of webWindowContentChanged(WebWindowEvent event), I put code below.

    InputStream xls = event.getWebWindow().getEnclosedPage().getWebResponse().getContentAsStream();
    

    After I get xls, I could save the file into my hard disk.

    0 讨论(0)
  • 2021-02-05 23:51

    I made it based on your post.. Note: you can change content-type condition for download only specific type of file. eg.( application/octect-stream, application/pdf, etc).

    package net.s4bdigital.export.main;
    
    import java.io.File;
    import java.io.FileOutputStream;
    import java.io.IOException;
    import java.io.InputStream;
    import java.io.OutputStream;
    import java.util.List;
    
    import org.junit.Before;
    import org.junit.Test;
    import org.openqa.selenium.By;
    import org.openqa.selenium.WebDriver;
    import org.openqa.selenium.htmlunit.HtmlUnitDriver;
    
    import com.gargoylesoftware.htmlunit.ConfirmHandler;
    import com.gargoylesoftware.htmlunit.Page;
    import com.gargoylesoftware.htmlunit.WebClient;
    import com.gargoylesoftware.htmlunit.WebResponse;
    import com.gargoylesoftware.htmlunit.WebWindowEvent;
    import com.gargoylesoftware.htmlunit.WebWindowListener;
    import com.gargoylesoftware.htmlunit.util.NameValuePair;
    
    public class HtmlUnitDownloadFile {
    
        protected String baseUrl;
        protected static WebDriver driver;
    
        @Before
        public void openBrowser() {
            baseUrl = "http://localhost/teste.html";
            driver = new CustomHtmlUnitDriver();
            ((HtmlUnitDriver) driver).setJavascriptEnabled(true);
    
        }
    
    
        @Test
        public void downloadAFile() throws Exception {
    
            driver.get(baseUrl);
            driver.findElement(By.linkText("click to Downloadfile")).click();
    
        }
    
        public class CustomHtmlUnitDriver extends HtmlUnitDriver { 
    
              // This is the magic. Keep a reference to the client instance 
               protected WebClient modifyWebClient(WebClient client) { 
    
    
                 ConfirmHandler okHandler = new ConfirmHandler(){
                        public boolean handleConfirm(Page page, String message) {
                            return true;
                        }
                 };
                 client.setConfirmHandler(okHandler);
    
                 client.addWebWindowListener(new WebWindowListener() {
    
                    public void webWindowOpened(WebWindowEvent event) {
                        // TODO Auto-generated method stub
    
                    }
    
                    public void webWindowContentChanged(WebWindowEvent event) {
    
                        WebResponse response = event.getWebWindow().getEnclosedPage().getWebResponse();
                        System.out.println(response.getLoadTime());
                        System.out.println(response.getStatusCode());
                        System.out.println(response.getContentType());
    
                        List<NameValuePair> headers = response.getResponseHeaders();
                        for(NameValuePair header: headers){
                            System.out.println(header.getName() + " : " + header.getValue());
                        }
    
                        // Change or add conditions for content-types that you would to like 
                        // receive like a file.
                        if(response.getContentType().equals("text/plain")){
                            getFileResponse(response, "target/testDownload.war");
                        }
    
    
    
                    }
    
                    public void webWindowClosed(WebWindowEvent event) {
    
    
    
                    }
                });          
    
                 return client; 
               } 
    
    
        } 
    
        public static void getFileResponse(WebResponse response, String fileName){
    
            InputStream inputStream = null;
    
            // write the inputStream to a FileOutputStream
            OutputStream outputStream = null; 
    
            try {       
    
                inputStream = response.getContentAsStream();
    
                // write the inputStream to a FileOutputStream
                outputStream = new FileOutputStream(new File(fileName));
    
                int read = 0;
                byte[] bytes = new byte[1024];
    
                while ((read = inputStream.read(bytes)) != -1) {
                    outputStream.write(bytes, 0, read);
                }
    
                System.out.println("Done!");
    
            } catch (IOException e) {
                e.printStackTrace();
            } finally {
                if (inputStream != null) {
                    try {
                        inputStream.close();
                    } catch (IOException e) {
                        e.printStackTrace();
                    }
                }
                if (outputStream != null) {
                    try {
                        // outputStream.flush();
                        outputStream.close();
                    } catch (IOException e) {
                        e.printStackTrace();
                    }
    
                }
            }
    
        }
    
    }
    
    0 讨论(0)
  • 2021-02-05 23:56
     final WebClient webClient = new WebClient(BrowserVersion.CHROME);
            webClient.getOptions().setTimeout(2000);
            webClient.getOptions().setThrowExceptionOnScriptError(false);
            webClient.getOptions().setThrowExceptionOnFailingStatusCode(false);
            webClient.waitForBackgroundJavaScript(2000);
    
            //get General page
            final HtmlPage page = webClient.getPage("http://your");
    
            //get Frame
            final HtmlPage frame = ((HtmlPage) 
            page.getFrameByName("Frame").getEnclosedPage());
    
            webClient.setConfirmHandler(new ConfirmHandler() {
                public boolean handleConfirm(Page page, String message) {
                    return true;
                }
            });
    
            //get element file
            final DomElement file = mainFrame.getElementByName("File");
    
            final InputStream xls =  file.click().getWebResponse().getContentAsStream();
    
            assertNotNull(xls);
        }
    
    0 讨论(0)
  • 2021-02-05 23:59

    There's an easier way if you're not into wrapping HtmlUnit with Selenium. Simply provide HtmlUnit's WebClient with the extended WebWindowListener.

    You could also use Apache commons.io for easy stream copying.

    WebClient webClient = new WebClient();
    webClient.addWebWindowListener(new WebWindowListener() {
        public void webWindowOpened(WebWindowEvent event) { }
    
        public void webWindowContentChanged(WebWindowEvent event) {
            // Change or add conditions for content-types that you would
            // to like receive like a file.
            if (response.getContentType().equals("text/plain")) {
                try {
                    IOUtils.copy(response.getContentAsStream(), new FileOutputStream("downloaded_file"));
                } catch (FileNotFoundException e) {
                    e.printStackTrace();
                } catch (IOException e) {
                    e.printStackTrace();
                }
            }
    
        }
    
        public void webWindowClosed(WebWindowEvent event) {}
    });
    
    0 讨论(0)
  • 2021-02-06 00:02

    Expanding on Roy's answer, here's my solution to this problem:

    public static void prepareForDownloadingFile(WebClient webClient, File output) {
        webClient.addWebWindowListener(new WebWindowListener() {
    
            public void webWindowOpened(WebWindowEvent event) {
            }
    
            public void webWindowContentChanged(WebWindowEvent event) {
                Page page = event.getNewPage();
                FileOutputStream fos = null;
                InputStream is = null;
                if (page != null && page instanceof UnexpectedPage) {
                    try {
                        fos = new FileOutputStream(output);
                        UnexpectedPage uPage = (UnexpectedPage) page;
                        is = uPage.getInputStream();
                        IOUtils.copy(is, fos);
                        webClient.removeWebWindowListener(this);
                    } catch (Exception e) {
                        e.printStackTrace();
                    } finally {
                        try {
                            if (fos != null)
                                fos.close();
                            if (is != null)
                                is.close();
                        } catch (IOException e) {
                            e.printStackTrace();
                        }
                    }
                }
    
            }
    
            public void webWindowClosed(WebWindowEvent event) {
            }
        });
    }
    

    I felt there were enough differences to make it a new answer:
    -Doesn't have a magic variable (response)
    -Closes InputStream and FileOutputStream
    -Looks for UnexpectedPage to determine we're not on a HTML page
    -Downloads a file one time after requesting then removes itself
    -Doesn't require knowing the ContentType

    Calling this once before, for example, clicking a button that initiates a download, will download that file.

    0 讨论(0)
提交回复
热议问题