Finally I figured how to use Node.js. Installed all libraries/extensions. So puppeteer is working, but as it was previous with Xmlhttp... it gets only template/body of the p
If you want full html same as inspect? Here it is:
const puppeteer = require('puppeteer');
(async function main() {
try {
const browser = await puppeteer.launch();
const [page] = await browser.pages();
await page.goto('', { waitUntil: 'networkidle0' });
const data = await page.evaluate(() => document.querySelector('*').outerHTML);
await browser.close();
} catch (err) {
let bodyHTML = await page.evaluate(() => document.documentElement.outerHTML);
Some notes:
You need not cheerio
with puppeteer
and you need not reparse page.content()
: you already have the full DOM with all scripts run and you can evaluate any code in window
context like in a browser using page.evaluate() and transferring serializable data between web API context and Node.js API context.
Try to use async/await only, this will simplify your code and flow.
If you need to wait till all the scripts and other dependencies are loaded, use waitUntil: 'networkidle0'
in page.goto().
If you suspect that document scripts need some time till the needed state, use various test functions like page.waitForSelector() or fall back to page.waitFor(milliseconds).
Here is a simple script that outputs all tag names in a page.
'use strict';
const puppeteer = require('puppeteer');
(async function main() {
try {
const browser = await puppeteer.launch();
const [page] = await browser.pages();
await page.goto('', { waitUntil: 'networkidle0' });
const data = await page.evaluate(
() => Array.from(document.querySelectorAll('*'))
.map(elem => elem.tagName)
await browser.close();
} catch (err) {
You can specify your task in more details and we can try to write something more appropriate.
Script for (task from a comment below):
'use strict';
const fs = require('fs');
const puppeteer = require('puppeteer');
(async function main() {
try {
const browser = await puppeteer.launch();
const [page] = await browser.pages();
await page.goto('¢er=%7B%22lat%22%3A50.16447196305031%2C%22lng%22%3A14.387521875272125%7D&zoom=11&locationInput=praha&limit=15');
await page.waitForSelector('#search-content button.btn-icon');
while (await page.$('#search-content button.btn-icon') !== null) {
const articlesForNow = (await page.$$('#search-content article')).length;
console.log(`Articles for now: ${articlesForNow}. Getting more...`);
await Promise.all([
() => { document.querySelector('#search-content button.btn-icon').click(); }
old => document.querySelectorAll('#search-content article').length > old,
const articlesAll = (await page.$$('#search-content article')).length;
console.log(`All articles: ${articlesAll}.`);
fs.writeFileSync('full.html', await page.content());
fs.writeFileSync('articles.html', await page.evaluate(
() => document.querySelector('#search-content div.b-filter__inner').outerHTML
fs.writeFileSync('articles.txt', await page.evaluate(
() => [...document.querySelectorAll('#search-content article')]
.map(({ innerText }) => innerText)
await browser.close();
} catch (err) {