REGEX: Capture Filename from URL without file extension

前端 未结 5 1271
忘掉有多难
忘掉有多难 2020-12-01 05:40

I am trying to create a Javascript Regex that captures the filename without the file extension. I have read the other posts here and \'goto this page: http://gunbl

相关标签:
5条回答
  • 2020-12-01 06:13

    You can try this regex :

    ([^/]*)\.[^.]*$
    
    0 讨论(0)
  • 2020-12-01 06:16

    tested and works, even for pages without file extension.

    var re = /([\w\d_-]*)\.?[^\\\/]*$/i;
    
    var url = "http://stackoverflow.com/questions/3671522/regex-capture-filename-from-url-without-file-extention";
    alert(url.match(re)[1]); // 'regex-capture-filename-from-url-without-file-extention'
    
    url = 'http://gunblad3.blogspot.com/2008/05/uri-url-parsing.html';
    alert(url.match(re)[1]); // 'uri-url-parsing'
    

    ([\w\d_-]*) get a string containing letters, digits, underscores or hyphens.
    \.? perhaps the string is followed by a period.
    [^\\\/]*$ but certainly not followed by a slash or backslash till the very end.
    /i oh yeh, ignore case.

    0 讨论(0)
  • 2020-12-01 06:28

    Try this regex. It can even handle filenames with multiple periods.

    (?<=\/)[^\/]*(?=\.\w+$)
    
    0 讨论(0)
  • 2020-12-01 06:35
    var url = "http://example.com/index.htm";
    var filename = url.match(/([^\/]+)(?=\.\w+$)/)[0];
    

    Let's go through the regular expression:

    [^\/]+    # one or more character that isn't a slash
    (?=       # open a positive lookahead assertion
      \.      # a literal dot character
      \w+     # one or more word characters
      $       # end of string boundary
    )         # end of the lookahead
    

    This expression will collect all characters that aren't a slash that are immediately followed (thanks to the lookahead) by an extension and the end of the string -- or, in other words, everything after the last slash and until the extension.

    Alternately, you can do this without regular expressions altogether, by finding the position of the last / and the last . using lastIndexOf and getting a substring between those points:

    var url = "http://example.com/index.htm";
    var filename = url.substring(url.lastIndexOf("/") + 1, url.lastIndexOf("."));
    
    0 讨论(0)
  • 2020-12-01 06:39

    I did not find any of the answers to be near robust enough. Here is my solution.

    function getFileName(url, includeExtension) {
        var matches = url && typeof url.match === "function" && url.match(/\/?([^/.]*)\.?([^/]*)$/);
        if (!matches)
            return null;
    
        if (includeExtension && matches.length > 2 && matches[2]) {
            return matches.slice(1).join(".");
        }
        return matches[1];
    }
    
    var url = "http://example.com/index.htm";
    var filename = getFileName(url);
    // index
    filename = getFileName(url, true);
    // index.htm
    
    url = "index.htm";
    filename = getFileName(url);
    // index
    filename = getFileName(url, true);
    // index.htm
    
    // BGerrissen's examples
    url = "http://stackoverflow.com/questions/3671522/regex-capture-filename-from-url-without-file-extention";
    filename = getFileName(url);
    // regex-capture-filename-from-url-without-file-extention
    filename = getFileName(url, true);
    // regex-capture-filename-from-url-without-file-extention
    
    url = "http://gunblad3.blogspot.com/2008/05/uri-url-parsing.html";
    filename = getFileName(url);
    // uri-url-parsing
    filename = getFileName(url, true);
    // uri-url-parsing.html
    
    // BGerrissen fails
    url = "http://gunblad3.blogspot.com/2008/05/uri%20url-parsing.html";
    filename = getFileName(url);
    // uri%20url-parsing
    filename = getFileName(url, true);
    // uri%20url-parsing.html
    
    // George Pantazis multiple dots
    url = "http://gunblad3.blogspot.com/2008/05/foo.global.js";
    filename = getFileName(url);
    // foo
    filename = getFileName(url, true);
    // foo.global.js
    
    // Fringe cases
    url = {};
    filename = getFileName(url);
    // null
    url = null;
    filename = getFileName(url);
    // null
    

    To fit with the original question, the default behavior is to exclude the extension, but that can easily be reversed.

    0 讨论(0)
提交回复
热议问题