I’m using JavaScript to pull a value out from a hidden field and display it in a textbox. The value in the hidden field is encoded.
For example,
Prototype has it built-in the String class. So if you are using/plan to use Prototype, it does something like:
'<div class="article">This is an article</div>'.escapeHTML();
// -> "<div class="article">This is an article</div>"
Based on angular's sanitize... (es6 module syntax)
// ref: https://github.com/angular/angular.js/blob/v1.3.14/src/ngSanitize/sanitize.js
const SURROGATE_PAIR_REGEXP = /[\uD800-\uDBFF][\uDC00-\uDFFF]/g;
const NON_ALPHANUMERIC_REGEXP = /([^\#-~| |!])/g;
const decodeElem = document.createElement('pre');
/**
* Decodes html encoded text, so that the actual string may
* be used.
* @param value
* @returns {string} decoded text
*/
export function decode(value) {
if (!value) return '';
decodeElem.innerHTML = value.replace(/</g, '<');
return decodeElem.textContent;
}
/**
* Encodes all potentially dangerous characters, so that the
* resulting string can be safely inserted into attribute or
* element text.
* @param value
* @returns {string} encoded text
*/
export function encode(value) {
if (value === null || value === undefined) return '';
return String(value).
replace(/&/g, '&').
replace(SURROGATE_PAIR_REGEXP, value => {
var hi = value.charCodeAt(0);
var low = value.charCodeAt(1);
return '&#' + (((hi - 0xD800) * 0x400) + (low - 0xDC00) + 0x10000) + ';';
}).
replace(NON_ALPHANUMERIC_REGEXP, value => {
return '&#' + value.charCodeAt(0) + ';';
}).
replace(/</g, '<').
replace(/>/g, '>');
}
export default {encode,decode};
HtmlEncodes the given value
var htmlEncodeContainer = $('<div />');
function htmlEncode(value) {
if (value) {
return htmlEncodeContainer.text(value).html();
} else {
return '';
}
}
As far as I know there isn't any straight forward HTML Encode/Decode method in javascript.
However, what you can do, is to use JS to create an arbitrary element, set its inner text, then read it using innerHTML.
Let's say, with jQuery, this should work:
var helper = $('chalk & cheese').hide().appendTo('body');
var htmled = helper.html();
helper.remove();
Or something along these lines.
EDIT: This answer was posted a long ago, and the htmlDecode
function introduced a XSS vulnerability. It has been modified changing the temporary element from a div
to a textarea
reducing the XSS chance. But nowadays, I would encourage you to use the DOMParser API as suggested in other anwswer.
I use these functions:
function htmlEncode(value){
// Create a in-memory element, set its inner text (which is automatically encoded)
// Then grab the encoded contents back out. The element never exists on the DOM.
return $('<textarea/>').text(value).html();
}
function htmlDecode(value){
return $('<textarea/>').html(value).text();
}
Basically a textarea element is created in memory, but it is never appended to the document.
On the htmlEncode
function I set the innerText
of the element, and retrieve the encoded innerHTML
; on the htmlDecode
function I set the innerHTML
value of the element and the innerText
is retrieved.
Check a running example here.
I know this is an old one, but I wanted to post a variation of the accepted answer that will work in IE without removing lines:
function multiLineHtmlEncode(value) {
var lines = value.split(/\r\n|\r|\n/);
for (var i = 0; i < lines.length; i++) {
lines[i] = htmlEncode(lines[i]);
}
return lines.join('\r\n');
}
function htmlEncode(value) {
return $('<div/>').text(value).html();
}