问题
I need to get the generated HTML source of the page after JS DOM manipulation has all been done. I was using Phantomas https://github.com/macbre/phantomas
for this purpose, but unfortunately it does not provide a way to pass in request headers.
Is there a library out there that will allow to pass request headers and then get the generated HTML source code.
Any pointers would be greatly helpful
回答1:
You can use "PhantomJS WebKit scriptable".
Specify customHeaders and get the page.content:
var webPage = require('webpage');
var page = webPage.create();
page.customHeaders = {
"X-Test": "foo",
"DNT": "1"
};
page.open('http://phantomjs.org', function (status) {
var content = page.content;
console.log('Content: ' + content);
phantom.exit();
});
Save it to test.js
and run:
phantomjs test.js
回答2:
You can use casperjs.
Pass headers
in a settings
object to the open() function and use getPageContent() to get the HTML source of the page:
var casper = require('casper').create();
var headers = {
'Accept-Language': 'en-US,en;q=0.8',
'HEADER-XYZ': 'HEADER-XYZ-DATA'
};
casper.start().then(function () {
this.open("http://casperjs.org", {
method: 'get',
headers: headers
});
});
casper.then(function() {
console.log(this.getPageContent());
});
casper.run(function() {
this.exit();
});
来源:https://stackoverflow.com/questions/27596852/get-generated-html-after-js-manipulates-the-dom-and-pass-request-headers