I need a reliable Javascript library / function to check if a HTML snippet is valid that I can call from my code. For example, it should check that opened tags and quotation
Update: this answer is limited - please see the edit below.
Expanding on @kolink's answer, I use:
var checkHTML = function(html) {
var doc = document.createElement('div');
doc.innerHTML = html;
return ( doc.innerHTML === html );
}
I.e., we create a temporary div with the HTML. In order to do this, the browser will create a DOM tree based on the HTML string, which may involve closing tags etc.
Comparing the div's HTML contents with the original HTML will tell us if the browser needed to change anything.
checkHTML('<a>hell<b>o</b>')
Returns false.
checkHTML('<a>hell<b>o</b></a>')
Returns true.
Edit: As @Quentin notes below, this is excessively strict for a variety of reasons: browsers will often fix omitted closing tags, even if closing tags are optional for that tag. Eg:
<p>one para
<p>second para
...is considered valid (since Ps are allowed to omit closing tags) but checkHTML
will return false. Browsers will also normalise tag cases, and alter white space. You should be aware of these limits when deciding to use this approach.
function isHTML(str)
{
var a = document.createElement('div');
a.innerHTML = str;
for(var c= a.ChildNodes, i = c.length; i--)
{
if (c[i].nodeType == 1) return true;
}
return false;
}
Good Luck!
function validHTML(html) {
var openingTags, closingTags;
html = html.replace(/<[^>]*\/\s?>/g, ''); // Remove all self closing tags
html = html.replace(/<(br|hr|img).*?>/g, ''); // Remove all <br>, <hr>, and <img> tags
openingTags = html.match(/<[^\/].*?>/g) || []; // Get remaining opening tags
closingTags = html.match(/<\/.+?>/g) || []; // Get remaining closing tags
return openingTags.length === closingTags.length ? true : false;
}
var htmlContent = "<p>your html content goes here</p>" // Note: String without any html tag will consider as valid html snippet. If it’s not valid in your case, in that case you can check opening tag count first.
if(validHTML(htmlContent)) {
alert('Valid HTML')
}
else {
alert('Invalid HTML');
}
Well, this code:
function tidy(html) {
var d = document.createElement('div');
d.innerHTML = html;
return d.innerHTML;
}
This will "correct" malformed HTML to the best of the browser's ability. If that's helpful to you, it's a lot easier than trying to validate HTML.
None of the solutions presented so far is doing a good job in answering the original question, especially when it comes to
I don't want the validation to fail because something is not 100% standard (but would work anyways).
tldr >> check the JSFiddle
So I used the input of the answers and comments on this topic and created a method that does the following:
<br/>
and empty attribute normalizations =""
are not ignoredReturns
normalized means, that on rendering, the browser ignores or repairs sometimes specific parts of the input (like adding missing closing-tags for <p>
and converts others (like single to double quotes or encoding of ampersands).
Making a distinction between "failed" and "normalized" allows to flag the content to the user as "this will not be rendered as you might expect it".
Most times normalized gives back an only slightly altered version of the original html string - still, sometimes the result is quite different. So this should be used e.g. to flag user-input for further review before saving it to a db or rendering it blindly. (see JSFiddle for examples of normalization)
The checks take the following exceptions into consideration
image
and other tags with a src
attribute are 'disarmed' during rendering<br/>
>> <br>
conversion<p disabled>
>> <p disabled="">
).innerHTML
, e.g. in attribute values.
function simpleValidateHtmlStr(htmlStr, strictBoolean) {
if (typeof htmlStr !== "string")
return false;
var validateHtmlTag = new RegExp("<[a-z]+(\s+|\"[^\"]*\"\s?|'[^']*'\s?|[^'\">])*>", "igm"),
sdom = document.createElement('div'),
noSrcNoAmpHtmlStr = htmlStr
.replace(/ src=/, " svhs___src=") // disarm src attributes
.replace(/&/igm, "#svhs#amp##"), // 'save' encoded ampersands
noSrcNoAmpIgnoreScriptContentHtmlStr = noSrcNoAmpHtmlStr
.replace(/\n\r?/igm, "#svhs#nl##") // temporarily remove line breaks
.replace(/(<script[^>]*>)(.*?)(<\/script>)/igm, "$1$3") // ignore script contents
.replace(/#svhs#nl##/igm, "\n\r"), // re-add line breaks
htmlTags = noSrcNoAmpIgnoreScriptContentHtmlStr.match(/<[a-z]+[^>]*>/igm), // get all start-tags
htmlTagsCount = htmlTags ? htmlTags.length : 0,
tagsAreValid, resHtmlStr;
if(!strictBoolean){
// ignore <br/> conversions
noSrcNoAmpHtmlStr = noSrcNoAmpHtmlStr.replace(/<br\s*\/>/, "<br>")
}
if (htmlTagsCount) {
tagsAreValid = htmlTags.reduce(function(isValid, tagStr) {
return isValid && tagStr.match(validateHtmlTag);
}, true);
if (!tagsAreValid) {
return false;
}
}
try {
sdom.innerHTML = noSrcNoAmpHtmlStr;
} catch (err) {
return false;
}
// compare rendered tag-count with expected tag-count
if (sdom.querySelectorAll("*").length !== htmlTagsCount) {
return false;
}
resHtmlStr = sdom.innerHTML.replace(/&/igm, "&"); // undo '&' encoding
if(!strictBoolean){
// ignore empty attribute normalizations
resHtmlStr = resHtmlStr.replace(/=""/, "")
}
// compare html strings while ignoring case, quote-changes, trailing spaces
var
simpleIn = noSrcNoAmpHtmlStr.replace(/["']/igm, "").replace(/\s+/igm, " ").toLowerCase().trim(),
simpleOut = resHtmlStr.replace(/["']/igm, "").replace(/\s+/igm, " ").toLowerCase().trim();
if (simpleIn === simpleOut)
return true;
return resHtmlStr.replace(/ svhs___src=/igm, " src=").replace(/#svhs#amp##/, "&");
}
Here you can find it in a JSFiddle https://jsfiddle.net/abernh/twgj8bev/ , together with different test-cases, including
"<a href='blue.html id='green'>missing attribute quotes</a>" // FAIL
"<a>hell<B>o</B></a>" // PASS
'<a href="test.html">hell<b>o</b></a>' // PASS
'<a href=test.html>hell<b>o</b></a>', // PASS
"<a href='test.html'>hell<b>o</b></a>", // PASS
'<ul><li>hell</li><li>hell</li></ul>', // PASS
'<ul><li>hell<li>hell</ul>', // PASS
'<div ng-if="true && valid">ampersands in attributes</div>' // PASS
.
Using pure JavaScript you may check if an element exists using the following function:
if (typeof(element) != 'undefined' && element != null)
Using the following code we can test this in action:
HTML:
<input type="button" value="Toggle .not-undefined" onclick="toggleNotUndefined()">
<input type="button" value="Check if .not-undefined exists" onclick="checkNotUndefined()">
<p class=".not-undefined"></p>
CSS:
p:after {
content: "Is 'undefined'";
color: blue;
}
p.not-undefined:after {
content: "Is not 'undefined'";
color: red;
}
JavaScript:
function checkNotUndefined(){
var phrase = "not ";
var element = document.querySelector('.not-undefined');
if (typeof(element) != 'undefined' && element != null) phrase = "";
alert("Element of class '.not-undefined' does "+phrase+"exist!");
// $(".thisClass").length checks to see if our elem exists in jQuery
}
function toggleNotUndefined(){
document.querySelector('p').classList.toggle('not-undefined');
}
It can be found on JSFiddle.