I\'ve been running the following code in order to download a csv
file from the website http://niftyindices.com/resources/holiday-calendar
:
I needed to download a file from behind a login, which was being handled by Puppeteer. targetcreated
was not being triggered. In the end I downloaded with request
, after copying the cookies over from the Puppeteer instance.
In this case, I'm streaming the file through, but you could just as easily save it.
res.writeHead(200, {
"Content-Type": 'application/octet-stream',
"Content-Disposition": `attachment; filename=secretfile.jpg`
});
let cookies = await page.cookies();
let jar = request.jar();
for (let cookie of cookies) {
jar.setCookie(`${cookie.name}=${cookie.value}`, "http://secretsite.com");
}
try {
var response = await request({ url: "http://secretsite.com/secretfile.jpg", jar }).pipe(res);
} catch(err) {
console.trace(err);
return res.send({ status: "error", message: err });
}
I found a way to wait for browser capability to download a file. The idea is to wait for response with predicate. In my case URL ends with '/data'.
I just didn't like to load file contents into buffer.
await page._client.send('Page.setDownloadBehavior', {
behavior: 'allow',
downloadPath: download_path,
});
await frame.focus(report_download_selector);
await Promise.all([
page.waitForResponse(r => r.url().endsWith('/data')),
page.keyboard.press('Enter'),
]);
I spent hours poring through this thread and Stack Overflow yesterday, trying to figure out how to get Puppeteer to download a csv file by clicking a download link in headless mode in an authenticated session. The accepted answer here didn't work in my case because the download does not trigger targetcreated
, and the next answer, for whatever reason, did not retain the authenticated session. This article saved the day. In short, fetch
. Hopefully this helps someone else out.
const res = await this.page.evaluate(() =>
{
return fetch('https://example.com/path/to/file.csv', {
method: 'GET',
credentials: 'include'
}).then(r => r.text());
});
I have another solution to this problem, since none of the answers here worked for me.
I needed to log into a website, and download some .csv reports. Headed was fine, headless failed no matter what I tried. Looking at the Network errors, the download is aborted, but I couldn't (quickly) determine why.
So, I intercepted the requests and used node-fetch to make the request outside of puppeteer. This required copying the fetch options, body, headers and adding in the access cookie.
Good luck.
setDownloadBehavior
works fine for headless: true
mode, and file is eventually downloaded, but throws an exception when finished, so for my case a simple wrapper helps to forget about this issue and just gets the job done:
const fs = require('fs');
function DownloadMgr(page, downloaddPath) {
if(!fs.existsSync(downloaddPath)){
fs.mkdirSync(downloaddPath);
}
var init = page.target().createCDPSession().then((client) => {
return client.send('Page.setDownloadBehavior', {behavior: 'allow', downloadPath: downloaddPath})
});
this.download = async function(url) {
await init;
try{
await page.goto(url);
}catch(e){}
return Promise.resolve();
}
}
var path = require('path');
var DownloadMgr = require('./classes/DownloadMgr');
var downloadMgr = new DownloadMgr(page, path.resolve('./tmp'));
await downloadMgr.download('http://file.csv');
The problem is that the browser closes before download finished.
You can get the filesize and the name of the file from the response, and then use a watch script to check filesize from downloaded file, in order to close the browser.
This is an example:
const filename = <set this with some regex in response>;
const dir = <watch folder or file>;
// Download and wait for download
await Promise.all([
page.click('#DownloadFile'),
// Event on all responses
page.on('response', response => {
// If response has a file on it
if (response._headers['content-disposition'] === `attachment;filename=${filename}`) {
// Get the size
console.log('Size del header: ', response._headers['content-length']);
// Watch event on download folder or file
fs.watchFile(dir, function (curr, prev) {
// If current size eq to size from response then close
if (parseInt(curr.size) === parseInt(response._headers['content-length'])) {
browser.close();
this.close();
}
});
}
})
]);
Even that the way of searching in response can be improved though I hope you'll find this usefull.