How does Stack Overflow generate its SEO-friendly URLs?

后端 未结 21 1841
-上瘾入骨i
-上瘾入骨i 2020-11-22 04:27

What is a good complete regular expression or some other process that would take the title:

How do you change a title to be part of the URL like Stack

相关标签:
21条回答
  • 2020-11-22 04:53

    You will want to setup a custom route to point the URL to the controller that will handle it. Since you are using Ruby on Rails, here is an introduction in using their routing engine.

    In Ruby, you will need a regular expression like you already know and here is the regular expression to use:

    def permalink_for(str)
        str.gsub(/[^\w\/]|[!\(\)\.]+/, ' ').strip.downcase.gsub(/\ +/, '-')
    end
    
    0 讨论(0)
  • 2020-11-22 04:53

    Here's my (slower, but fun to write) version of Jeff's code:

    public static string URLFriendly(string title)
    {
        char? prevRead = null,
            prevWritten = null;
    
        var seq = 
            from c in title
            let norm = RemapInternationalCharToAscii(char.ToLowerInvariant(c).ToString())[0]
            let keep = char.IsLetterOrDigit(norm)
            where prevRead.HasValue || keep
            let replaced = keep ? norm
                :  prevWritten != '-' ? '-'
                :  (char?)null
            where replaced != null
            let s = replaced + (prevRead == null ? ""
                : norm == '#' && "cf".Contains(prevRead.Value) ? "sharp"
                : norm == '+' ? "plus"
                : "")
            let _ = prevRead = norm
            from written in s
            let __ = prevWritten = written
            select written;
    
        const int maxlen = 80;  
        return string.Concat(seq.Take(maxlen)).TrimEnd('-');
    }
    
    public static string RemapInternationalCharToAscii(string text)
    {
        var seq = text.Normalize(NormalizationForm.FormD)
            .Where(c => CharUnicodeInfo.GetUnicodeCategory(c) != UnicodeCategory.NonSpacingMark);
    
        return string.Concat(seq).Normalize(NormalizationForm.FormC);
    }
    

    My test string:

    " I love C#, F#, C++, and... Crème brûlée!!! They see me codin'... they hatin'... tryin' to catch me codin' dirty... "

    0 讨论(0)
  • 2020-11-22 04:58

    For good measure, here's the PHP function in WordPress that does it... I'd think that WordPress is one of the more popular platforms that uses fancy links.

        function sanitize_title_with_dashes($title) {
                $title = strip_tags($title);
                // Preserve escaped octets.
                $title = preg_replace('|%([a-fA-F0-9][a-fA-F0-9])|', '---$1---', $title);
                // Remove percent signs that are not part of an octet.
                $title = str_replace('%', '', $title);
                // Restore octets.
                $title = preg_replace('|---([a-fA-F0-9][a-fA-F0-9])---|', '%$1', $title);
                $title = remove_accents($title);
                if (seems_utf8($title)) {
                        if (function_exists('mb_strtolower')) {
                                $title = mb_strtolower($title, 'UTF-8');
                        }
                        $title = utf8_uri_encode($title, 200);
                }
                $title = strtolower($title);
                $title = preg_replace('/&.+?;/', '', $title); // kill entities
                $title = preg_replace('/[^%a-z0-9 _-]/', '', $title);
                $title = preg_replace('/\s+/', '-', $title);
                $title = preg_replace('|-+|', '-', $title);
                $title = trim($title, '-');
                return $title;
        }
    

    This function as well as some of the supporting functions can be found in wp-includes/formatting.php.

    0 讨论(0)
  • 2020-11-22 04:58

    There is a small Ruby on Rails plugin called PermalinkFu, that does this. The escape method does the transformation into a string that is suitable for a URL. Have a look at the code; that method is quite simple.

    To remove non-ASCII characters it uses the iconv lib to translate to 'ascii//ignore//translit' from 'utf-8'. Spaces are then turned into dashes, everything is downcased, etc.

    0 讨论(0)
  • 2020-11-22 04:58

    Now all Browser handle nicely utf8 encoding, so you can use WebUtility.UrlEncode Method , its like HttpUtility.UrlEncode used by @giamin but its work outside of a web application.

    0 讨论(0)
  • 2020-11-22 04:58

    I ported the code to TypeScript. It can easily be adapted to JavaScript.

    I am adding a .contains method to the String prototype, if you're targeting the latest browsers or ES6 you can use .includes instead.

    if (!String.prototype.contains) {
        String.prototype.contains = function (check) {
            return this.indexOf(check, 0) !== -1;
        };
    }
    
    declare interface String {
        contains(check: string): boolean;
    }
    
    export function MakeUrlFriendly(title: string) {
                if (title == null || title == '')
                    return '';
    
                const maxlen = 80;
                let len = title.length;
                let prevdash = false;
                let result = '';
                let c: string;
                let cc: number;
                let remapInternationalCharToAscii = function (c: string) {
                    let s = c.toLowerCase();
                    if ("àåáâäãåą".contains(s)) {
                        return "a";
                    }
                    else if ("èéêëę".contains(s)) {
                        return "e";
                    }
                    else if ("ìíîïı".contains(s)) {
                        return "i";
                    }
                    else if ("òóôõöøőð".contains(s)) {
                        return "o";
                    }
                    else if ("ùúûüŭů".contains(s)) {
                        return "u";
                    }
                    else if ("çćčĉ".contains(s)) {
                        return "c";
                    }
                    else if ("żźž".contains(s)) {
                        return "z";
                    }
                    else if ("śşšŝ".contains(s)) {
                        return "s";
                    }
                    else if ("ñń".contains(s)) {
                        return "n";
                    }
                    else if ("ýÿ".contains(s)) {
                        return "y";
                    }
                    else if ("ğĝ".contains(s)) {
                        return "g";
                    }
                    else if (c == 'ř') {
                        return "r";
                    }
                    else if (c == 'ł') {
                        return "l";
                    }
                    else if (c == 'đ') {
                        return "d";
                    }
                    else if (c == 'ß') {
                        return "ss";
                    }
                    else if (c == 'Þ') {
                        return "th";
                    }
                    else if (c == 'ĥ') {
                        return "h";
                    }
                    else if (c == 'ĵ') {
                        return "j";
                    }
                    else {
                        return "";
                    }
                };
    
                for (let i = 0; i < len; i++) {
                    c = title[i];
                    cc = c.charCodeAt(0);
    
                    if ((cc >= 97 /* a */ && cc <= 122 /* z */) || (cc >= 48 /* 0 */ && cc <= 57 /* 9 */)) {
                        result += c;
                        prevdash = false;
                    }
                    else if ((cc >= 65 && cc <= 90 /* A - Z */)) {
                        result += c.toLowerCase();
                        prevdash = false;
                    }
                    else if (c == ' ' || c == ',' || c == '.' || c == '/' || c == '\\' || c == '-' || c == '_' || c == '=') {
                        if (!prevdash && result.length > 0) {
                            result += '-';
                            prevdash = true;
                        }
                    }
                    else if (cc >= 128) {
                        let prevlen = result.length;
                        result += remapInternationalCharToAscii(c);
                        if (prevlen != result.length) prevdash = false;
                    }
                    if (i == maxlen) break;
                }
    
                if (prevdash)
                    return result.substring(0, result.length - 1);
                else
                    return result;
            }
    
    0 讨论(0)
提交回复
热议问题