Is there a way in JS to get the entire HTML within the html tags, as a string?
document.documentElement.??
To also get things outside the ...
, most importantly the declaration, you could walk through document.childNodes, turning each into a string:
const html = [...document.childNodes]
.map(node => nodeToString(node))
.join('\n') // could use '' instead, but whitespace should not matter.
function nodeToString(node) {
switch (node.nodeType) {
case node.ELEMENT_NODE:
return node.outerHTML
case node.TEXT_NODE:
// Text nodes should probably never be encountered, but handling them anyway.
return node.textContent
case node.COMMENT_NODE:
return ``
case node.DOCUMENT_TYPE_NODE:
return doctypeToString(node)
default:
throw new TypeError(`Unexpected node type: ${node.nodeType}`)
}
}
I published this code as document-outerhtml on npm.
edit Note the code above depends on a function doctypeToString
; its implementation could be as follows (code below is published on npm as doctype-to-string):
function doctypeToString(doctype) {
if (doctype === null) {
return ''
}
// Checking with instanceof DocumentType might be neater, but how to get a
// reference to DocumentType without assuming it to be available globally?
// To play nice with custom DOM implementations, we resort to duck-typing.
if (!doctype
|| doctype.nodeType !== doctype.DOCUMENT_TYPE_NODE
|| typeof doctype.name !== 'string'
|| typeof doctype.publicId !== 'string'
|| typeof doctype.systemId !== 'string'
) {
throw new TypeError('Expected a DocumentType')
}
const doctypeString = ``
return doctypeString
}