Extract hostname name from string

后端 未结 28 1501
情歌与酒
情歌与酒 2020-11-22 07:15

I would like to match just the root of a URL and not the whole URL from a text string. Given:

http://www.youtube.co         


        
相关标签:
28条回答
  • 2020-11-22 08:06

    A neat trick without using regular expressions:

    var tmp        = document.createElement ('a');
    ;   tmp.href   = "http://www.example.com/12xy45";
    
    // tmp.hostname will now contain 'www.example.com'
    // tmp.host will now contain hostname and port 'www.example.com:80'
    

    Wrap the above in a function such as the below and you have yourself a superb way of snatching the domain part out of an URI.

    function url_domain(data) {
      var    a      = document.createElement('a');
             a.href = data;
      return a.hostname;
    }
    
    0 讨论(0)
  • 2020-11-22 08:06

    I personally researched a lot for this solution, and the best one I could find is actually from CloudFlare's "browser check":

    function getHostname(){  
                secretDiv = document.createElement('div');
                secretDiv.innerHTML = "<a href='/'>x</a>";
                secretDiv = secretDiv.firstChild.href;
                var HasHTTPS = secretDiv.match(/https?:\/\//)[0];
                secretDiv = secretDiv.substr(HasHTTPS.length);
                secretDiv = secretDiv.substr(0, secretDiv.length - 1);
                return(secretDiv);  
    }  
    
    getHostname();
    

    I rewritten variables so it is more "human" readable, but it does the job better than expected.

    0 讨论(0)
  • 2020-11-22 08:07
    String.prototype.trim = function(){return his.replace(/^\s+|\s+$/g,"");}
    function getHost(url){
        if("undefined"==typeof(url)||null==url) return "";
        url = url.trim(); if(""==url) return "";
        var _host,_arr;
        if(-1<url.indexOf("://")){
            _arr = url.split('://');
            if(-1<_arr[0].indexOf("/")||-1<_arr[0].indexOf(".")||-1<_arr[0].indexOf("\?")||-1<_arr[0].indexOf("\&")){
                _arr[0] = _arr[0].trim();
                if(0==_arr[0].indexOf("//")) _host = _arr[0].split("//")[1].split("/")[0].trim().split("\?")[0].split("\&")[0];
                else return "";
            }
            else{
                _arr[1] = _arr[1].trim();
                _host = _arr[1].split("/")[0].trim().split("\?")[0].split("\&")[0];
            }
        }
        else{
            if(0==url.indexOf("//")) _host = url.split("//")[1].split("/")[0].trim().split("\?")[0].split("\&")[0];
            else return "";
        }
        return _host;
    }
    function getHostname(url){
        if("undefined"==typeof(url)||null==url) return "";
        url = url.trim(); if(""==url) return "";
        return getHost(url).split(':')[0];
    }
    function getDomain(url){
        if("undefined"==typeof(url)||null==url) return "";
        url = url.trim(); if(""==url) return "";
        return getHostname(url).replace(/([a-zA-Z0-9]+.)/,"");
    }
    
    0 讨论(0)
  • 2020-11-22 08:07
    function getHostName(url) {
      const urlStart: string = url.includes('//www.') ? '//www.' : '//';
      
      return url.substring(url.indexOf(urlStart) + urlStart.length).split('/')[0];
    };
    
    console.log(getHostName('https://stackoverflow.com/questions')) // stackoverflow.com
    
    console.log(getHostName('stackoverflow.com/questions')) // stackoverflow.com
    
    0 讨论(0)
  • 2020-11-22 08:08

    2020 answer

    You don't need any extra dependencies for this! Depending on whether you need to optimize for performance or not, there are two good solutions:

    1. Use URL.hostname for readability

    In the Babel era, the cleanest and easiest solution is to use URL.hostname.

    const getHostname = (url) => {
      // use URL constructor and return hostname
      return new URL(url).hostname;
    }
    
    // tests
    console.log(getHostname("https://stackoverflow.com/questions/8498592/extract-hostname-name-from-string/"));
    console.log(getHostname("https://developer.mozilla.org/en-US/docs/Web/API/URL/hostname"));

    URL.hostname is part of the URL API, supported by all major browsers except IE (caniuse). Use a URL polyfill if you need to support legacy browsers.

    Using this solution will also give you access to other URL properties and methods. This will be useful if you also want to extract the URL's pathname or query string params, for example.


    2. Use RegEx for performance

    URL.hostname is faster than using the anchor solution or parseUri. However it's still much slower than gilly3's regex:

    const getHostnameFromRegex = (url) => {
      // run against regex
      const matches = url.match(/^https?\:\/\/([^\/?#]+)(?:[\/?#]|$)/i);
      // extract hostname (will be null if no match is found)
      return matches && matches[1];
    }
    
    // tests
    console.log(getHostnameFromRegex("https://stackoverflow.com/questions/8498592/extract-hostname-name-from-string/"));
    console.log(getHostnameFromRegex("https://developer.mozilla.org/en-US/docs/Web/API/URL/hostname"));

    Test it yourself on this jsPerf


    TL;DR

    If you need to process a very large number of URLs (where performance would be a factor), use RegEx. Otherwise, use URL.hostname.

    0 讨论(0)
  • 2020-11-22 08:10

    My code looks like this. Regular expressions can come in many forms, and here are my test cases I think it's more scalable.

    function extractUrlInfo(url){
      let reg = /^((?<protocol>http[s]?):\/\/)?(?<host>((\d{1,2}|1\d\d|2[0-4]\d|25[0-5])\.(\d{1,2}|1\d\d|2[0-4]\d|25[0-5])\.(\d{1,2}|1\d\d|2[0-4]\d|25[0-5])\.(\d{1,2}|1\d\d|2[0-4]\d|25[0-5])|[-a-zA-Z0-9@:%._\+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b([-a-zA-Z0-9()@:%_\+.~#?&//=]*)))(\:(?<port>[0-9]|[1-9]\d|[1-9]\d{2}|[1-9]\d{3}|[1-5]\d{4}|6[0-4]\d{3}|65[0-4]\d{2}|655[0-2]\d|6553[0-5]))?$/
      return reg.exec(url).groups
    }
    
    var url = "https://192.168.1.1:1234"
    console.log(extractUrlInfo(url))
    var url = "https://stackoverflow.com/questions/8498592/extract-hostname-name-from-string"
    console.log(extractUrlInfo(url))

    0 讨论(0)
提交回复
热议问题