I have some Javascript code that communicates with an XML-RPC backend. The XML-RPC returns strings of the form:
You're welcome...just a messenger...full credit goes to ourcodeworld.com, link below.
window.htmlentities = {
/**
* Converts a string to its html characters completely.
*
* @param {String} str String with unescaped HTML characters
**/
encode : function(str) {
var buf = [];
for (var i=str.length-1;i>=0;i--) {
buf.unshift(['&#', str[i].charCodeAt(), ';'].join(''));
}
return buf.join('');
},
/**
* Converts an html characterSet into its original character.
*
* @param {String} str htmlSet entities
**/
decode : function(str) {
return str.replace(/&#(\d+);/g, function(match, dec) {
return String.fromCharCode(dec);
});
}
};
Full Credit: https://ourcodeworld.com/articles/read/188/encode-and-decode-html-entities-using-pure-javascript
I use this in my project: inspired by other answers but with an extra secure parameter, can be useful when you deal with decorated characters
var decodeEntities=(function(){
var el=document.createElement('div');
return function(str, safeEscape){
if(str && typeof str === 'string'){
str=str.replace(/\</g, '<');
el.innerHTML=str;
if(el.innerText){
str=el.innerText;
el.innerText='';
}
else if(el.textContent){
str=el.textContent;
el.textContent='';
}
if(safeEscape)
str=str.replace(/\</g, '<');
}
return str;
}
})();
And it's usable like:
var label='safe <b> character éntity</b>';
var safehtml='<div title="'+decodeEntities(label)+'">'+decodeEntities(label, true)+'</div>';
If you're using jQuery:
function htmlDecode(value){
return $('<div/>').html(value).text();
}
Otherwise, use Strictly Software's Encoder Object, which has an excellent htmlDecode()
function.
function decodeHTMLContent(htmlText) {
var txt = document.createElement("span");
txt.innerHTML = htmlText;
return txt.innerText;
}
var result = decodeHTMLContent('One & two & three');
console.log(result);
Most answers given here have a huge disadvantage: if the string you are trying to convert isn't trusted then you will end up with a Cross-Site Scripting (XSS) vulnerability. For the function in the accepted answer, consider the following:
htmlDecode("<img src='dummy' onerror='alert(/xss/)'>");
The string here contains an unescaped HTML tag, so instead of decoding anything the htmlDecode
function will actually run JavaScript code specified inside the string.
This can be avoided by using DOMParser which is supported in all modern browsers:
function htmlDecode(input) {
var doc = new DOMParser().parseFromString(input, "text/html");
return doc.documentElement.textContent;
}
console.log( htmlDecode("<img src='myimage.jpg'>") )
// "<img src='myimage.jpg'>"
console.log( htmlDecode("<img src='dummy' onerror='alert(/xss/)'>") )
// ""
This function is guaranteed to not run any JavaScript code as a side-effect. Any HTML tags will be ignored, only text content will be returned.
Compatibility note: Parsing HTML with DOMParser
requires at least Chrome 30, Firefox 12, Opera 17, Internet Explorer 10, Safari 7.1 or Microsoft Edge. So all browsers without support are way past their EOL and as of 2017 the only ones that can still be seen in the wild occasionally are older Internet Explorer and Safari versions (usually these still aren't numerous enough to bother).
See the benchmark: https://jsperf.com/decode-html12345678/1
console.log(decodeEntities('test: >'));
function decodeEntities(str) {
// this prevents any overhead from creating the object each time
const el = decodeEntities.element || document.createElement('textarea')
// strip script/html tags
el.innerHTML = str
.replace(/<script[^>]*>([\S\s]*?)<\/script>/gmi, '')
.replace(/<\/?\w(?:[^"'>]|"[^"]*"|'[^']*')*>/gmi, '');
return el.value;
}
If you need to leave tags, then remove the two .replace(...)
calls (you can leave the first one if you do not need scripts).