Is there a way in JavaScript to check if a string is a URL?
RegExes are excluded because the URL is most likely written like stackoverflow
; that is to s
Mathias Bynens has compiled a list of well-known URL regexes with test URLs. There is little reason to write a new regular expression; just pick an existing one that suits you best.
But the comparison table for those regexes also shows that it is next to impossible to do URL validation with a single regular expression. All of the regexes in Bynens' list produce false positives and false negatives.
I suggest that you use an existing URL parser (for example new URL('http://www.example.com/')
in JavaScript) and then apply the checks you want to perform against the parsed and normalized form of the URL resp. its components. Using the JavaScript URL
interface has the additional benefit that it will only accept such URLs that are really accepted by the browser.
You should also keep in mind that technically incorrect URLs may still work. For example http://w_w_w.example.com/
, http://www..example.com/
, http://123.example.com/
all have an invalid hostname part but every browser I know will try to open them without complaints, and when you specify IP addresses for those invalid names in /etc/hosts/
such URLs will even work but only on your computer.
The question is, therefore, not so much whether a URL is valid, but rather which URLs work and should be allowed in a particular context.
If you want to do URL validation there are a lot of details and edge cases that are easy to overlook:
http://user:password@www.example.com/
.http://www.stackoverflow.com.
). [-0-9a-zA-z]
is definitely no longer sufficient.co.uk
and many others).Which of these limitations and rules apply is a question of project requirements and taste.
I have recently written a URL validator for a web app that is suitable for user-supplied URLs in forums, social networks, or the like. Feel free to use it as a base for your own one:
I have also written a blog post The Gory Details of URL Validation with more in-depth information.
Rather than using a regular expression, I would recommend making use of an anchor element.
when you set the href
property of an anchor
, various other properties are set.
var parser = document.createElement('a');
parser.href = "http://example.com:3000/pathname/?search=test#hash";
parser.protocol; // => "http:"
parser.hostname; // => "example.com"
parser.port; // => "3000"
parser.pathname; // => "/pathname/"
parser.search; // => "?search=test"
parser.hash; // => "#hash"
parser.host; // => "example.com:3000"
source
However, if the value href
is bound to is not a valid url, then the value of those auxiliary properties will be the empty string.
Edit: as pointed out in the comments: if an invalid url is used, the properties of the current URL may be substituted.
So, as long as you're not passing in the URL of the current page, you can do something like:
function isValidURL(str) {
var a = document.createElement('a');
a.href = str;
return (a.host && a.host != window.location.host);
}
I am using below function to validate URL with or without http/https
:
function isValidURL(string) {
var res = string.match(/(http(s)?:\/\/.)?(www\.)?[-a-zA-Z0-9@:%._\+~#=]{2,256}\.[a-z]{2,6}\b([-a-zA-Z0-9@:%_\+.~#?&//=]*)/g);
return (res !== null)
};
var testCase1 = "http://en.wikipedia.org/wiki/Procter_&_Gamble";
console.log(isValidURL(testCase1)); // return true
var testCase2 = "http://www.google.com/url?sa=i&rct=j&q=&esrc=s&source=images&cd=&docid=nIv5rk2GyP3hXM&tbnid=isiOkMe3nCtexM:&ved=0CAUQjRw&url=http%3A%2F%2Fanimalcrossing.wikia.com%2Fwiki%2FLion&ei=ygZXU_2fGKbMsQTf4YLgAQ&bvm=bv.65177938,d.aWc&psig=AFQjCNEpBfKnal9kU7Zu4n7RnEt2nerN4g&ust=1398298682009707";
console.log(isValidURL(testCase2)); // return true
var testCase3 = "https://sdfasd";
console.log(isValidURL(testCase3)); // return false
var testCase4 = "dfdsfdsfdfdsfsdfs";
console.log(isValidURL(testCase4)); // return false
var testCase5 = "magnet:?xt=urn:btih:123";
console.log(isValidURL(testCase5)); // return false
var testCase6 = "https://stackoverflow.com/";
console.log(isValidURL(testCase6)); // return true
var testCase7 = "https://w";
console.log(isValidURL(testCase7)); // return false
var testCase8 = "https://sdfasdp.ppppppppppp";
console.log(isValidURL(testCase8)); // return false
If you need to also support https://localhost:3000
then use this modified version of [Devshed]s regex.
function isURL(url) {
if(!url) return false;
var pattern = new RegExp('^(https?:\\/\\/)?'+ // protocol
'((([a-z\\d]([a-z\\d-]*[a-z\\d])*)\\.)+[a-z]{2,}|'+ // domain name
'((\\d{1,3}\\.){3}\\d{1,3}))|' + // OR ip (v4) address
'localhost' + // OR localhost
'(\\:\\d+)?(\\/[-a-z\\d%_.~+]*)*'+ // port and path
'(\\?[;&a-z\\d%_.~+=-]*)?'+ // query string
'(\\#[-a-z\\d_]*)?$', 'i'); // fragment locator
return pattern.test(url);
}
Use validator.js
ES6
import isURL from 'validator/lib/isURL'
isURL(string)
No ES6
var validator = require('validator');
validator.isURL(string)
You can also fine tune this function's behavior by passing optional options
object as the second argument of isURL
Here is the default options
object:
let options = {
protocols: [
'http',
'https',
'ftp'
],
require_tld: true,
require_protocol: false,
require_host: true,
require_valid_protocol: true,
allow_underscores: false,
host_whitelist: false,
host_blacklist: false,
allow_trailing_dot: false,
allow_protocol_relative_urls: false,
disallow_auth: false
}
isURL(string, options)
host_whitelist
and host_blacklist
can be arrays of hosts. They also support regular expressions.
let options = {
host_blacklist: ['foo.com', 'bar.com'],
}
isURL('http://foobar.com', options) // => true
isURL('http://foo.bar.com/', options) // => true
isURL('http://qux.com', options) // => true
isURL('http://bar.com/', options) // => false
isURL('http://foo.com/', options) // => false
options = {
host_blacklist: ['bar.com', 'foo.com', /\.foo\.com$/],
}
isURL('http://foobar.com', options) // => true
isURL('http://foo.bar.com/', options) // => true
isURL('http://qux.com', options) // => true
isURL('http://bar.com/', options) // => false
isURL('http://foo.com/', options) // => false
isURL('http://images.foo.com/', options) // => false
isURL('http://cdn.foo.com/', options) // => false
isURL('http://a.b.c.foo.com/', options) // => false
This seems to be one of the hardest problems in CS ;)
Here's another incomplete solution that works well enough for me and better than the others I've seen here. I'm using a input[type=url] for this in order to support IE11, otherwise it would be much simpler using window.URL to perform the validation instead:
const ipv4Regex = /^(\d{1,3}\.){3}\d{1,3}$/;
function isValidIpv4(ip) {
if (!ipv4Regex.test(ip)) return false;
return !ip.split('.').find(n => n > 255);
}
const domainRegex = /(?:[a-z0-9-]{1,63}\.){1,125}[a-z]{2,63}$/i;
function isValidDomain(domain) {
return isValidIpv4(domain) || domainRegex.test(domain);
}
let input;
function validateUrl(url) {
if (! /^https?:\/\//.test(url)) url = `http://${url}`; // assuming Babel is used
// to support IE11 we'll resort to input[type=url] instead of window.URL:
// try { return isValidDomain(new URL(url).host) && url; } catch(e) { return false; }
if (!input) { input = document.createElement('input'); input.type = 'url'; }
input.value = url;
if (! input.validity.valid) return false;
const domain = url.split(/^https?:\/\//)[1].split('/')[0].split('@').pop();
return isValidDomain(domain) && url;
}
console.log(validateUrl('google'), // false
validateUrl('user:pw@mydomain.com'),
validateUrl('https://google.com'),
validateUrl('100.100.100.100/abc'),
validateUrl('100.100.100.256/abc')); // false
In order to accept incomplete inputs such as "www.mydomain.com" it will also make it valid assuming the protocol is "http" in those cases and returning the valid URL if the address is valid. It returns false when invalid.
It also supports IPv4 domains, but not IPv6.