I want to create a scraper that:
In puppeter you have access to the session cookies through page.cookies()
.
So once you log in, you could get every cookie and save it in a json file:
const fs = require(fs);
const cookiesFilePath = 'cookies.json';
// Save Session Cookies
const cookiesObject = await page.cookies()
// Write cookies to temp file to be used in other profile pages
fs.writeFile(cookiesFilePath, JSON.stringify(cookiesObject),
function(err) {
if (err) {
console.log('The file could not be written.', err)
}
console.log('Session has been successfully saved')
})
Then, on your next iteration right before using page.goto()
you can call page.setCookie()
to load the cookies from the file one by one:
const previousSession = fs.existsSync(cookiesFilePath)
if (previousSession) {
// If file exist load the cookies
const cookiesString = fs.readFileSync(cookiesFilePath);
const parsedCookies = JSON.parse(cookiesString);
if (parsedCookies.length !== 0) {
for (let cookie of parsedCookies) {
await page.setCookie(cookie)
}
console.log('Session has been loaded in the browser')
}
}
Checkout the docs:
There is an option to save user data using the userDataDir
option when launching puppeteer. This stores the session and other things related to launching chrome.
puppeteer.launch({
userDataDir: "./user_data"
});
It doesn't go into great detail but here's a link to the docs for it: https://pptr.dev/#?product=Puppeteer&version=v1.6.1&show=api-puppeteerlaunchoptions
For a version of the above solution that actually works and doesn't rely on jsonfile
(instead using the more standard fs
) check this out:
Setup:
const fs = require('fs');
const cookiesPath = "cookies.txt";
Reading the cookies (put this code first):
// If the cookies file exists, read the cookies.
const previousSession = fs.existsSync(cookiesPath)
if (previousSession) {
const content = fs.readFileSync(cookiesPath);
const cookiesArr = JSON.parse(content);
if (cookiesArr.length !== 0) {
for (let cookie of cookiesArr) {
await page.setCookie(cookie)
}
console.log('Session has been loaded in the browser')
}
}
Writing the cookies:
// Write Cookies
const cookiesObject = await page.cookies()
fs.writeFileSync(cookiesPath, JSON.stringify(cookiesObject));
console.log('Session has been saved to ' + cookiesPath);
For writing Cookies
async function writingCookies() {
const cookieArray = require(C.cookieFile); //C.cookieFile can be replaced by ('./filename.json')
await page.setCookie(...cookieArray);
await page.cookies(C.feedUrl); //C.url can be ('https://example.com')
}
For reading Cookies, for this, you've to install jsonfile in your project : npm install jsonfile
async function getCookies() {
const cookiesObject = await page.cookies();
jsonfile.writeFile('linkedinCookies.json', cookiesObject, { spaces: 2 },
function (err) {
if (err) {
console.log('The Cookie file could not be written.', err);
}
console.log("Cookie file has been successfully saved in current working Directory : '" + process.cwd() + "'");
})
}
Call these two functions using await
and it will work for you.