I am having a little bit of regex trouble.
I am trying to get the path in this url videoplay
.
http://video.google.co.uk:80/videoplay?docid=-
This expression gets everything after videoplay
, aka the url path.
/\/(videoplay.+)/
This expression gets everything after the port. Also consisting of the path.
/\:\d./(.+)/
However If using Node.js
I recommend the native url
module.
var url = require('url')
var youtubeUrl = "http://video.google.co.uk:80/videoplay?docid=-7246927612831078230&hl=en#hello"
url.parse(youtubeUrl)
Which does all of the regex work for you.
{
protocol: 'http:',
slashes: true,
auth: null,
host: 'video.google.co.uk:80',
port: '80',
hostname: 'video.google.co.uk',
hash: '#hello',
search: '?docid=-7246927612831078230&hl=en',
query: 'docid=-7246927612831078230&hl=en',
pathname: '/videoplay',
path: '/videoplay?docid=-7246927612831078230&hl=en',
href: 'http://video.google.co.uk:80/videoplay?docid=-7246927612831078230&hl=en#hello'
}
(http[s]?:\/\/)?([^\/\s]+\/)(.*)
group 3
Demo: http://regex101.com/r/vK4rV7/1
I think this is what you're after: [^/]+$
Demo: http://regex101.com/r/rG8gB9
You can try this:
^(?:[^/]*(?:/(?:/[^/]*/?)?)?([^?]+)(?:\??.+)?)$
([^?]+) above is the capturing group which returns your path.
Please note that this is not an all-URL regex. It just solves your problem of matching all the text between the first "/" occurring after "//" and the following "?" character.
If you need an all-matching regex, you can check this StackOverflow link where they have discussed and dissected all possibilities of an URI into its constituent parts including your "path".
If you consider that an overkill AND if you know that your input URL will always follow a pattern of having your path between the first "/" and following "?", then the above regex should be sufficient.
function getPath(url, defaults){
var reUrlPath = /(?:\w+:)?\/\/[^\/]+([^?#]+)/;
var urlParts = url.match(reUrlPath) || [url, defaults];
return urlParts.pop();
}
alert( getPath('http://stackoverflow.com/q/123/regex-url', 'unknown') );
alert( getPath('https://stackoverflow.com/q/123/regex-url', 'unknown') );
alert( getPath('//stackoverflow.com/q/123/regex-url', 'unknown') );
alert( getPath('http://stackoverflow.com/q/123/regex-url?foo', 'unknown') );
alert( getPath('http://stackoverflow.com/q/123/regex-url#foo', 'unknown') );
alert( getPath('http://stackoverflow.com/q/123/regex-url/', 'unknown') );
alert( getPath('http://stackoverflow.com/q/123/regex-url/?foo', 'unknown') );
alert( getPath('http://stackoverflow.com/q/123/regex-url/#foo', 'unknown') );
alert( getPath('http://stackoverflow.com/', 'unknown') );
You mean a negative lookbehind? (?<!/)