I am using the function below to match URLs inside a given text and replace them for HTML links. The regular expression is working great, but currently I am only replacing t
Try Below Solution
function replaceLinkClickableLink(url = '') {
let pattern = new RegExp('^(https?:\\/\\/)?'+
'((([a-z\\d]([a-z\\d-]*[a-z\\d])*)\\.?)+[a-z]{2,}|'+
'((\\d{1,3}\\.){3}\\d{1,3}))'+
'(\\:\\d+)?(\\/[-a-z\\d%_.~+]*)*'+
'(\\?[;&a-z\\d%_.~+=-]*)?'+
'(\\#[-a-z\\d_]*)?$','i');
let isUrl = pattern.test(url);
if (isUrl) {
return `<a href="${url}" target="_blank">${url}</a>`;
}
return url;
}
Here's my solution:
var content = "Visit https://wwww.google.com or watch this video: https://www.youtube.com/watch?v=0T4DQYgsazo and news at http://www.bbc.com";
content = replaceUrlsWithLinks(content, "http://");
content = replaceUrlsWithLinks(content, "https://");
function replaceUrlsWithLinks(content, protocol) {
var startPos = 0;
var s = 0;
while (s < content.length) {
startPos = content.indexOf(protocol, s);
if (startPos < 0)
return content;
let endPos = content.indexOf(" ", startPos + 1);
if (endPos < 0)
endPos = content.length;
let url = content.substr(startPos, endPos - startPos);
if (url.endsWith(".") || url.endsWith("?") || url.endsWith(",")) {
url = url.substr(0, url.length - 1);
endPos--;
}
if (ROOTNS.utils.stringsHelper.validUrl(url)) {
let link = "<a href='" + url + "'>" + url + "</a>";
content = content.substr(0, startPos) + link + content.substr(endPos);
s = startPos + link.length;
} else {
s = endPos + 1;
}
}
return content;
}
function validUrl(url) {
try {
new URL(url);
return true;
} catch (e) {
return false;
}
}
I've made some small modifications to Travis's code (just to avoid any unnecessary redeclaration - but it's working great for my needs, so nice job!):
function linkify(inputText) {
var replacedText, replacePattern1, replacePattern2, replacePattern3;
//URLs starting with http://, https://, or ftp://
replacePattern1 = /(\b(https?|ftp):\/\/[-A-Z0-9+&@#\/%?=~_|!:,.;]*[-A-Z0-9+&@#\/%=~_|])/gim;
replacedText = inputText.replace(replacePattern1, '<a href="$1" target="_blank">$1</a>');
//URLs starting with "www." (without // before it, or it'd re-link the ones done above).
replacePattern2 = /(^|[^\/])(www\.[\S]+(\b|$))/gim;
replacedText = replacedText.replace(replacePattern2, '$1<a href="http://$2" target="_blank">$2</a>');
//Change email addresses to mailto:: links.
replacePattern3 = /(([a-zA-Z0-9\-\_\.])+@[a-zA-Z\_]+?(\.[a-zA-Z]{2,6})+)/gim;
replacedText = replacedText.replace(replacePattern3, '<a href="mailto:$1">$1</a>');
return replacedText;
}
If you need to show shorter link (only domain), but with same long URL, you can try my modification of Sam Hasler's code version posted above
function replaceURLWithHTMLLinks(text) {
var exp = /(\b(https?|ftp|file):\/\/([-A-Z0-9+&@#%?=~_|!:,.;]*)([-A-Z0-9+&@#%?\/=~_|!:,.;]*)[-A-Z0-9+&@#\/%=~_|])/ig;
return text.replace(exp, "<a href='$1' target='_blank'>$3</a>");
}
Keep it simple! Say what you cannot have, rather than what you can have :)
As mentioned above, URLs can be quite complex, especially after the '?', and not all of them start with a 'www.' e.g. maps.bing.com/something?key=!"£$%^*()&lat=65&lon&lon=20
So, rather than have a complex regex that wont meet all edge cases, and will be hard to maintain, how about this much simpler one, which works well for me in practise.
Match
http(s):// (anything but a space)+
www. (anything but a space)+
Where 'anything' is [^'"<>\s]
... basically a greedy match, carrying on to you meet a space, quote, angle bracket, or end of line
Also:
Remember to check that it is not already in URL format, e.g. the text contains href="..."
or src="..."
Add ref=nofollow (if appropriate)
This solution isn't as "good" as the libraries mentioned above, but is much simpler, and works well in practise.
if html.match( /(href)|(src)/i )) {
return html; // text already has a hyper link in it
}
html = html.replace(
/\b(https?:\/\/[^\s\(\)\'\"\<\>]+)/ig,
"<a ref='nofollow' href='$1'>$1</a>"
);
html = html.replace(
/\s(www\.[^\s\(\)\'\"\<\>]+)/ig,
"<a ref='nofollow' href='http://$1'>$1</a>"
);
html = html.replace(
/^(www\.[^\s\(\)\'\"\<\>]+)/ig,
"<a ref='nofollow' href='http://$1'>$1</a>"
);
return html;
After input from several sources I've now a solution that works well. It had to do with writing your own replacement code.
Answer.
Fiddle.
function replaceURLWithHTMLLinks(text) {
var re = /(\(.*?)?\b((?:https?|ftp|file):\/\/[-a-z0-9+&@#\/%?=~_()|!:,.;]*[-a-z0-9+&@#\/%=~_()|])/ig;
return text.replace(re, function(match, lParens, url) {
var rParens = '';
lParens = lParens || '';
// Try to strip the same number of right parens from url
// as there are left parens. Here, lParenCounter must be
// a RegExp object. You cannot use a literal
// while (/\(/g.exec(lParens)) { ... }
// because an object is needed to store the lastIndex state.
var lParenCounter = /\(/g;
while (lParenCounter.exec(lParens)) {
var m;
// We want m[1] to be greedy, unless a period precedes the
// right parenthesis. These tests cannot be simplified as
// /(.*)(\.?\).*)/.exec(url)
// because if (.*) is greedy then \.? never gets a chance.
if (m = /(.*)(\.\).*)/.exec(url) ||
/(.*)(\).*)/.exec(url)) {
url = m[1];
rParens = m[2] + rParens;
}
}
return lParens + "<a href='" + url + "'>" + url + "</a>" + rParens;
});
}