I am trying to create a Javascript Regex that captures the filename without the file extension. I have read the other posts here and \'goto this page: http://gunbl
You can try this regex :
([^/]*)\.[^.]*$
tested and works, even for pages without file extension.
var re = /([\w\d_-]*)\.?[^\\\/]*$/i;
var url = "http://stackoverflow.com/questions/3671522/regex-capture-filename-from-url-without-file-extention";
alert(url.match(re)[1]); // 'regex-capture-filename-from-url-without-file-extention'
url = 'http://gunblad3.blogspot.com/2008/05/uri-url-parsing.html';
alert(url.match(re)[1]); // 'uri-url-parsing'
([\w\d_-]*)
get a string containing letters, digits, underscores or hyphens.
\.?
perhaps the string is followed by a period.
[^\\\/]*$
but certainly not followed by a slash or backslash till the very end.
/i
oh yeh, ignore case.
Try this regex. It can even handle filenames with multiple periods.
(?<=\/)[^\/]*(?=\.\w+$)
var url = "http://example.com/index.htm";
var filename = url.match(/([^\/]+)(?=\.\w+$)/)[0];
Let's go through the regular expression:
[^\/]+ # one or more character that isn't a slash
(?= # open a positive lookahead assertion
\. # a literal dot character
\w+ # one or more word characters
$ # end of string boundary
) # end of the lookahead
This expression will collect all characters that aren't a slash that are immediately followed (thanks to the lookahead) by an extension and the end of the string -- or, in other words, everything after the last slash and until the extension.
Alternately, you can do this without regular expressions altogether, by finding the position of the last /
and the last .
using lastIndexOf and getting a substring between those points:
var url = "http://example.com/index.htm";
var filename = url.substring(url.lastIndexOf("/") + 1, url.lastIndexOf("."));
I did not find any of the answers to be near robust enough. Here is my solution.
function getFileName(url, includeExtension) {
var matches = url && typeof url.match === "function" && url.match(/\/?([^/.]*)\.?([^/]*)$/);
if (!matches)
return null;
if (includeExtension && matches.length > 2 && matches[2]) {
return matches.slice(1).join(".");
}
return matches[1];
}
var url = "http://example.com/index.htm";
var filename = getFileName(url);
// index
filename = getFileName(url, true);
// index.htm
url = "index.htm";
filename = getFileName(url);
// index
filename = getFileName(url, true);
// index.htm
// BGerrissen's examples
url = "http://stackoverflow.com/questions/3671522/regex-capture-filename-from-url-without-file-extention";
filename = getFileName(url);
// regex-capture-filename-from-url-without-file-extention
filename = getFileName(url, true);
// regex-capture-filename-from-url-without-file-extention
url = "http://gunblad3.blogspot.com/2008/05/uri-url-parsing.html";
filename = getFileName(url);
// uri-url-parsing
filename = getFileName(url, true);
// uri-url-parsing.html
// BGerrissen fails
url = "http://gunblad3.blogspot.com/2008/05/uri%20url-parsing.html";
filename = getFileName(url);
// uri%20url-parsing
filename = getFileName(url, true);
// uri%20url-parsing.html
// George Pantazis multiple dots
url = "http://gunblad3.blogspot.com/2008/05/foo.global.js";
filename = getFileName(url);
// foo
filename = getFileName(url, true);
// foo.global.js
// Fringe cases
url = {};
filename = getFileName(url);
// null
url = null;
filename = getFileName(url);
// null
To fit with the original question, the default behavior is to exclude the extension, but that can easily be reversed.