Detect URLs in text with JavaScript

后端 未结 13 2014
孤城傲影
孤城傲影 2020-11-22 06:23

Does anyone have suggestions for detecting URLs in a set of strings?

arrayOfStrings.forEach(function(string){
  // detect URLs in strings and do something sw         


        
相关标签:
13条回答
  • Here is what I ended up using as my regex:

    var urlRegex =/(\b(https?|ftp|file):\/\/[-A-Z0-9+&@#\/%?=~_|!:,.;]*[-A-Z0-9+&@#\/%=~_|])/ig;
    

    This doesn't include trailing punctuation in the URL. Crescent's function works like a charm :) so:

    function linkify(text) {
        var urlRegex =/(\b(https?|ftp|file):\/\/[-A-Z0-9+&@#\/%?=~_|!:,.;]*[-A-Z0-9+&@#\/%=~_|])/ig;
        return text.replace(urlRegex, function(url) {
            return '<a href="' + url + '">' + url + '</a>';
        });
    }
    
    0 讨论(0)
  • 2020-11-22 07:10

    This library on NPM looks like it is pretty comprehensive https://www.npmjs.com/package/linkifyjs

    Linkify is a small yet comprehensive JavaScript plugin for finding URLs in plain-text and converting them to HTML links. It works with all valid URLs and email addresses.

    0 讨论(0)
  • 2020-11-22 07:10
    let str = 'https://example.com is a great site'
    str.replace(/(https?:\/\/[^\s]+)/g,"<a href='$1' target='_blank' >$1</a>")
    

    Short Code Big Work!...

    Result:-

     <a href="https://example.com" target="_blank" > https://example.com </a>
    
    0 讨论(0)
  • 2020-11-22 07:13

    Based on Crescent Fresh answer

    if you want to detect links with http:// OR without http:// and by www. you can use the following

    function urlify(text) {
        var urlRegex = /(((https?:\/\/)|(www\.))[^\s]+)/g;
        //var urlRegex = /(https?:\/\/[^\s]+)/g;
        return text.replace(urlRegex, function(url,b,c) {
            var url2 = (c == 'www.') ?  'http://' +url : url;
            return '<a href="' +url2+ '" target="_blank">' + url + '</a>';
        }) 
    }
    
    0 讨论(0)
  • 2020-11-22 07:14

    You can use a regex like this to extract normal url patterns.

    (https?:\/\/(?:www\.|(?!www))[a-zA-Z0-9][a-zA-Z0-9-]+[a-zA-Z0-9]\.[^\s]{2,}|www\.[a-zA-Z0-9][a-zA-Z0-9-]+[a-zA-Z0-9]\.[^\s]{2,}|https?:\/\/(?:www\.|(?!www))[a-zA-Z0-9]+\.[^\s]{2,}|www\.[a-zA-Z0-9]+\.[^\s]{2,})
    

    If you need more sophisticated patterns, use a library like this.

    https://www.npmjs.com/package/pattern-dreamer

    0 讨论(0)
  • 2020-11-22 07:16

    try this:

    function isUrl(s) {
        if (!isUrl.rx_url) {
            // taken from https://gist.github.com/dperini/729294
            isUrl.rx_url=/^(?:(?:https?|ftp):\/\/)?(?:\S+(?::\S*)?@)?(?:(?!(?:10|127)(?:\.\d{1,3}){3})(?!(?:169\.254|192\.168)(?:\.\d{1,3}){2})(?!172\.(?:1[6-9]|2\d|3[0-1])(?:\.\d{1,3}){2})(?:[1-9]\d?|1\d\d|2[01]\d|22[0-3])(?:\.(?:1?\d{1,2}|2[0-4]\d|25[0-5])){2}(?:\.(?:[1-9]\d?|1\d\d|2[0-4]\d|25[0-4]))|(?:(?:[a-z\u00a1-\uffff0-9]-*)*[a-z\u00a1-\uffff0-9]+)(?:\.(?:[a-z\u00a1-\uffff0-9]-*)*[a-z\u00a1-\uffff0-9]+)*(?:\.(?:[a-z\u00a1-\uffff]{2,}))\.?)(?::\d{2,5})?(?:[/?#]\S*)?$/i;
            // valid prefixes
            isUrl.prefixes=['http:\/\/', 'https:\/\/', 'ftp:\/\/', 'www.'];
            // taken from https://w3techs.com/technologies/overview/top_level_domain/all
            isUrl.domains=['com','ru','net','org','de','jp','uk','br','pl','in','it','fr','au','info','nl','ir','cn','es','cz','kr','ua','ca','eu','biz','za','gr','co','ro','se','tw','mx','vn','tr','ch','hu','at','be','dk','tv','me','ar','no','us','sk','xyz','fi','id','cl','by','nz','il','ie','pt','kz','io','my','lt','hk','cc','sg','edu','pk','su','bg','th','top','lv','hr','pe','club','rs','ae','az','si','ph','pro','ng','tk','ee','asia','mobi'];
        }
    
        if (!isUrl.rx_url.test(s)) return false;
        for (let i=0; i<isUrl.prefixes.length; i++) if (s.startsWith(isUrl.prefixes[i])) return true;
        for (let i=0; i<isUrl.domains.length; i++) if (s.endsWith('.'+isUrl.domains[i]) || s.includes('.'+isUrl.domains[i]+'\/') ||s.includes('.'+isUrl.domains[i]+'?')) return true;
        return false;
    }
    
    function isEmail(s) {
        if (!isEmail.rx_email) {
            // taken from http://stackoverflow.com/a/16016476/460084
            var sQtext = '[^\\x0d\\x22\\x5c\\x80-\\xff]';
            var sDtext = '[^\\x0d\\x5b-\\x5d\\x80-\\xff]';
            var sAtom = '[^\\x00-\\x20\\x22\\x28\\x29\\x2c\\x2e\\x3a-\\x3c\\x3e\\x40\\x5b-\\x5d\\x7f-\\xff]+';
            var sQuotedPair = '\\x5c[\\x00-\\x7f]';
            var sDomainLiteral = '\\x5b(' + sDtext + '|' + sQuotedPair + ')*\\x5d';
            var sQuotedString = '\\x22(' + sQtext + '|' + sQuotedPair + ')*\\x22';
            var sDomain_ref = sAtom;
            var sSubDomain = '(' + sDomain_ref + '|' + sDomainLiteral + ')';
            var sWord = '(' + sAtom + '|' + sQuotedString + ')';
            var sDomain = sSubDomain + '(\\x2e' + sSubDomain + ')*';
            var sLocalPart = sWord + '(\\x2e' + sWord + ')*';
            var sAddrSpec = sLocalPart + '\\x40' + sDomain; // complete RFC822 email address spec
            var sValidEmail = '^' + sAddrSpec + '$'; // as whole string
    
            isEmail.rx_email = new RegExp(sValidEmail);
        }
    
        return isEmail.rx_email.test(s);
    }
    

    will also recognize urls such as google.com , http://www.google.bla , http://google.bla , www.google.bla but not google.bla

    0 讨论(0)
提交回复
热议问题