I have a that a user types something in, and they are allowed to type html. Once they are done typing, the
change
Believe it or not you can (safely) do this with the browser's built in HTML parser. Simply create a new div with document.createElement
, toss the contents of the textarea into the div using innerHTML
, and presto, you've got a full blown DOM to work with. And no, scripts contained within this div will not be evaluated.
Here's a simple example that strips from an element all tags that do not appear in an ALLOWED_TAGS
list.
var ALLOWED_TAGS = ["STRONG", "EM", "BLOCKQUOTE", "Q", "DEL", "INS", "A"];
function sanitize(el) {
"Remove all tags from element `el' that aren't in the ALLOWED_TAGS list."
var tags = Array.prototype.slice.apply(el.getElementsByTagName("*"), [0]);
for (var i = 0; i < tags.length; i++) {
if (ALLOWED_TAGS.indexOf(tags[i].nodeName) == -1) {
usurp(tags[i]);
}
}
}
function usurp(p) {
"Replace parent `p' with its children.";
var last = p;
for (var i = p.childNodes.length - 1; i >= 0; i--) {
var e = p.removeChild(p.childNodes[i]);
p.parentNode.insertBefore(e, last);
last = e;
}
p.parentNode.removeChild(p);
}
As mentioned, you'll have to create an empty div container to use this. Here's one example application of the technique, a function to sanitize strings. Please note, however, that "sanitize" is at this time a misnomer--it will take a lot more work (cleaning attribute strings and such) before this "sanitizer" will output HTML that is truly safe.
function sanitizeString(string) {
var div = document.createElement("div");
div.innerHTML = string;
sanitize(div);
return div.innerHTML;
}