I would like to match just the root of a URL and not the whole URL from a text string. Given:
http://www.youtube.co
// use this if you know you have a subdomain
// www.domain.com -> domain.com
function getDomain() {
return window.location.hostname.replace(/([a-zA-Z0-9]+.)/,"");
}
Code:
var regex = /\w+.(com|co\.kr|be)/ig;
var urls = ['http://www.youtube.com/watch?v=ClkQA2Lb_iE',
'http://youtu.be/ClkQA2Lb_iE',
'http://www.example.com/12xy45',
'http://example.com/random'];
$.each(urls, function(index, url) {
var convertedUrl = url.match(regex);
console.log(convertedUrl);
});
Result:
youtube.com
youtu.be
example.com
example.com
This solution gives your answer plus additional properties. No JQuery or other dependencies required, paste and go.
Usage
getUrlParts("https://news.google.com/news/headlines/technology.html?ned=us&hl=en")
Output
{
"origin": "https://news.google.com",
"domain": "news.google.com",
"subdomain": "news",
"domainroot": "google.com",
"domainpath": "news.google.com/news/headlines",
"tld": ".com",
"path": "news/headlines/technology.html",
"query": "ned=us&hl=en",
"protocol": "https",
"port": 443,
"parts": [
"news",
"google",
"com"
],
"segments": [
"news",
"headlines",
"technology.html"
],
"params": [
{
"key": "ned",
"val": "us"
},
{
"key": "hl",
"val": "en"
}
]
}
Code
The code is designed to be easy to understand rather than super fast. It can be called easily 100 times per second, so it's great for front end or a few server usages, but not for high volume throughput.
function getUrlParts(fullyQualifiedUrl) {
var url = {},
tempProtocol
var a = document.createElement('a')
// if doesn't start with something like https:// it's not a url, but try to work around that
if (fullyQualifiedUrl.indexOf('://') == -1) {
tempProtocol = 'https://'
a.href = tempProtocol + fullyQualifiedUrl
} else
a.href = fullyQualifiedUrl
var parts = a.hostname.split('.')
url.origin = tempProtocol ? "" : a.origin
url.domain = a.hostname
url.subdomain = parts[0]
url.domainroot = ''
url.domainpath = ''
url.tld = '.' + parts[parts.length - 1]
url.path = a.pathname.substring(1)
url.query = a.search.substr(1)
url.protocol = tempProtocol ? "" : a.protocol.substr(0, a.protocol.length - 1)
url.port = tempProtocol ? "" : a.port ? a.port : a.protocol === 'http:' ? 80 : a.protocol === 'https:' ? 443 : a.port
url.parts = parts
url.segments = a.pathname === '/' ? [] : a.pathname.split('/').slice(1)
url.params = url.query === '' ? [] : url.query.split('&')
for (var j = 0; j < url.params.length; j++) {
var param = url.params[j];
var keyval = param.split('=')
url.params[j] = {
'key': keyval[0],
'val': keyval[1]
}
}
// domainroot
if (parts.length > 2) {
url.domainroot = parts[parts.length - 2] + '.' + parts[parts.length - 1];
// check for country code top level domain
if (parts[parts.length - 1].length == 2 && parts[parts.length - 1].length == 2)
url.domainroot = parts[parts.length - 3] + '.' + url.domainroot;
}
// domainpath (domain+path without filenames)
if (url.segments.length > 0) {
var lastSegment = url.segments[url.segments.length - 1]
var endsWithFile = lastSegment.indexOf('.') != -1
if (endsWithFile) {
var fileSegment = url.path.indexOf(lastSegment)
var pathNoFile = url.path.substr(0, fileSegment - 1)
url.domainpath = url.domain
if (pathNoFile)
url.domainpath = url.domainpath + '/' + pathNoFile
} else
url.domainpath = url.domain + '/' + url.path
} else
url.domainpath = url.domain
return url
}
This is not a full answer, but the below code should help you:
function myFunction() {
var str = "https://www.123rf.com/photo_10965738_lots-oop.html";
matches = str.split('/');
return matches[2];
}
I would like some one to create code faster than mine. It help to improve my-self also.
Parse-Urls appears to be the JavaScript library with the most robust patterns
Here is a rundown of the features:
Chapter 1. Normalize or parse one URL
Chapter 2. Extract all URLs
Chapter 3. Extract URIs with certain names
Chapter 4. Extract all fuzzy URLs
Chapter 5. Highlight all URLs in texts
Chapter 6. Extract all URLs in raw HTML or XML
Okay, I know this is an old question, but I made a super-efficient url parser so I thought I'd share it.
As you can see, the structure of the function is very odd, but it's for efficiency. No prototype functions are used, the string doesn't get iterated more than once, and no character is processed more than necessary.
function getDomain(url) {
var dom = "", v, step = 0;
for(var i=0,l=url.length; i<l; i++) {
v = url[i]; if(step == 0) {
//First, skip 0 to 5 characters ending in ':' (ex: 'https://')
if(i > 5) { i=-1; step=1; } else if(v == ':') { i+=2; step=1; }
} else if(step == 1) {
//Skip 0 or 4 characters 'www.'
//(Note: Doesn't work with www.com, but that domain isn't claimed anyway.)
if(v == 'w' && url[i+1] == 'w' && url[i+2] == 'w' && url[i+3] == '.') i+=4;
dom+=url[i]; step=2;
} else if(step == 2) {
//Stop at subpages, queries, and hashes.
if(v == '/' || v == '?' || v == '#') break; dom += v;
}
}
return dom;
}