Extract hostname name from string

后端 未结 28 1468
情歌与酒
情歌与酒 2020-11-22 07:15

I would like to match just the root of a URL and not the whole URL from a text string. Given:

http://www.youtube.co         


        
相关标签:
28条回答
  • 2020-11-22 07:58
    // use this if you know you have a subdomain
    // www.domain.com -> domain.com
    function getDomain() {
      return window.location.hostname.replace(/([a-zA-Z0-9]+.)/,"");
    }
    
    0 讨论(0)
  • 2020-11-22 08:01

    Code:

    var regex = /\w+.(com|co\.kr|be)/ig;
    var urls = ['http://www.youtube.com/watch?v=ClkQA2Lb_iE',
                'http://youtu.be/ClkQA2Lb_iE',
                'http://www.example.com/12xy45',
                'http://example.com/random'];
    
    
    $.each(urls, function(index, url) {
        var convertedUrl = url.match(regex);
        console.log(convertedUrl);
    });
    

    Result:

    youtube.com
    youtu.be
    example.com
    example.com
    
    0 讨论(0)
  • 2020-11-22 08:02

    All url properties, no dependencies, no JQuery, easy to understand

    This solution gives your answer plus additional properties. No JQuery or other dependencies required, paste and go.

    Usage

    getUrlParts("https://news.google.com/news/headlines/technology.html?ned=us&hl=en")
    

    Output

    {
      "origin": "https://news.google.com",
      "domain": "news.google.com",
      "subdomain": "news",
      "domainroot": "google.com",
      "domainpath": "news.google.com/news/headlines",
      "tld": ".com",
      "path": "news/headlines/technology.html",
      "query": "ned=us&hl=en",
      "protocol": "https",
      "port": 443,
      "parts": [
        "news",
        "google",
        "com"
      ],
      "segments": [
        "news",
        "headlines",
        "technology.html"
      ],
      "params": [
        {
          "key": "ned",
          "val": "us"
        },
        {
          "key": "hl",
          "val": "en"
        }
      ]
    }
    

    Code
    The code is designed to be easy to understand rather than super fast. It can be called easily 100 times per second, so it's great for front end or a few server usages, but not for high volume throughput.

    function getUrlParts(fullyQualifiedUrl) {
        var url = {},
            tempProtocol
        var a = document.createElement('a')
        // if doesn't start with something like https:// it's not a url, but try to work around that
        if (fullyQualifiedUrl.indexOf('://') == -1) {
            tempProtocol = 'https://'
            a.href = tempProtocol + fullyQualifiedUrl
        } else
            a.href = fullyQualifiedUrl
        var parts = a.hostname.split('.')
        url.origin = tempProtocol ? "" : a.origin
        url.domain = a.hostname
        url.subdomain = parts[0]
        url.domainroot = ''
        url.domainpath = ''
        url.tld = '.' + parts[parts.length - 1]
        url.path = a.pathname.substring(1)
        url.query = a.search.substr(1)
        url.protocol = tempProtocol ? "" : a.protocol.substr(0, a.protocol.length - 1)
        url.port = tempProtocol ? "" : a.port ? a.port : a.protocol === 'http:' ? 80 : a.protocol === 'https:' ? 443 : a.port
        url.parts = parts
        url.segments = a.pathname === '/' ? [] : a.pathname.split('/').slice(1)
        url.params = url.query === '' ? [] : url.query.split('&')
        for (var j = 0; j < url.params.length; j++) {
            var param = url.params[j];
            var keyval = param.split('=')
            url.params[j] = {
                'key': keyval[0],
                'val': keyval[1]
            }
        }
        // domainroot
        if (parts.length > 2) {
            url.domainroot = parts[parts.length - 2] + '.' + parts[parts.length - 1];
            // check for country code top level domain
            if (parts[parts.length - 1].length == 2 && parts[parts.length - 1].length == 2)
                url.domainroot = parts[parts.length - 3] + '.' + url.domainroot;
        }
        // domainpath (domain+path without filenames) 
        if (url.segments.length > 0) {
            var lastSegment = url.segments[url.segments.length - 1]
            var endsWithFile = lastSegment.indexOf('.') != -1
            if (endsWithFile) {
                var fileSegment = url.path.indexOf(lastSegment)
                var pathNoFile = url.path.substr(0, fileSegment - 1)
                url.domainpath = url.domain
                if (pathNoFile)
                    url.domainpath = url.domainpath + '/' + pathNoFile
            } else
                url.domainpath = url.domain + '/' + url.path
        } else
            url.domainpath = url.domain
        return url
    }
    
    0 讨论(0)
  • 2020-11-22 08:02

    This is not a full answer, but the below code should help you:

    function myFunction() {
        var str = "https://www.123rf.com/photo_10965738_lots-oop.html";
        matches = str.split('/');
        return matches[2];
    }
    

    I would like some one to create code faster than mine. It help to improve my-self also.

    0 讨论(0)
  • 2020-11-22 08:02

    Parse-Urls appears to be the JavaScript library with the most robust patterns

    Here is a rundown of the features:

    Chapter 1. Normalize or parse one URL

    Chapter 2. Extract all URLs

    Chapter 3. Extract URIs with certain names

    Chapter 4. Extract all fuzzy URLs

    Chapter 5. Highlight all URLs in texts

    Chapter 6. Extract all URLs in raw HTML or XML

    0 讨论(0)
  • 2020-11-22 08:04

    Okay, I know this is an old question, but I made a super-efficient url parser so I thought I'd share it.

    As you can see, the structure of the function is very odd, but it's for efficiency. No prototype functions are used, the string doesn't get iterated more than once, and no character is processed more than necessary.

    function getDomain(url) {
        var dom = "", v, step = 0;
        for(var i=0,l=url.length; i<l; i++) {
            v = url[i]; if(step == 0) {
                //First, skip 0 to 5 characters ending in ':' (ex: 'https://')
                if(i > 5) { i=-1; step=1; } else if(v == ':') { i+=2; step=1; }
            } else if(step == 1) {
                //Skip 0 or 4 characters 'www.'
                //(Note: Doesn't work with www.com, but that domain isn't claimed anyway.)
                if(v == 'w' && url[i+1] == 'w' && url[i+2] == 'w' && url[i+3] == '.') i+=4;
                dom+=url[i]; step=2;
            } else if(step == 2) {
                //Stop at subpages, queries, and hashes.
                if(v == '/' || v == '?' || v == '#') break; dom += v;
            }
        }
        return dom;
    }
    
    0 讨论(0)
提交回复
热议问题