C# - Compare String Similarity

后端 未结 2 1721
醉梦人生
醉梦人生 2020-12-02 08:12

What is the best way to compare 2 strings to see how similar they are?

Examples:

My String
My String With Extra Words

Or

         


        
相关标签:
2条回答
  • 2020-12-02 08:57
    static class LevenshteinDistance
    {
        public static int Compute(string s, string t)
        {
            if (string.IsNullOrEmpty(s))
            {
                if (string.IsNullOrEmpty(t))
                    return 0;
                return t.Length;
            }
    
            if (string.IsNullOrEmpty(t))
            {
                return s.Length;
            }
    
            int n = s.Length;
            int m = t.Length;
            int[,] d = new int[n + 1, m + 1];
    
            // initialize the top and right of the table to 0, 1, 2, ...
            for (int i = 0; i <= n; d[i, 0] = i++);
            for (int j = 1; j <= m; d[0, j] = j++);
    
            for (int i = 1; i <= n; i++)
            {
                for (int j = 1; j <= m; j++)
                {
                    int cost = (t[j - 1] == s[i - 1]) ? 0 : 1;
                    int min1 = d[i - 1, j] + 1;
                    int min2 = d[i, j - 1] + 1;
                    int min3 = d[i - 1, j - 1] + cost;
                    d[i, j] = Math.Min(Math.Min(min1, min2), min3);
                }
            }
            return d[n, m];
        }
    }
    
    0 讨论(0)
  • 2020-12-02 08:59

    If anyone was wondering what the C# equivalent of what @FrankSchwieterman posted is:

    public static int GetDamerauLevenshteinDistance(string s, string t)
    {
        if (string.IsNullOrEmpty(s))
        {
            throw new ArgumentNullException(s, "String Cannot Be Null Or Empty");
        }
    
        if (string.IsNullOrEmpty(t))
        {
            throw new ArgumentNullException(t, "String Cannot Be Null Or Empty");
        }
    
        int n = s.Length; // length of s
        int m = t.Length; // length of t
    
        if (n == 0)
        {
            return m;
        }
    
        if (m == 0)
        {
            return n;
        }
    
        int[] p = new int[n + 1]; //'previous' cost array, horizontally
        int[] d = new int[n + 1]; // cost array, horizontally
    
        // indexes into strings s and t
        int i; // iterates through s
        int j; // iterates through t
    
        for (i = 0; i <= n; i++)
        {
            p[i] = i;
        }
    
        for (j = 1; j <= m; j++)
        {
            char tJ = t[j - 1]; // jth character of t
            d[0] = j;
    
            for (i = 1; i <= n; i++)
            {
                int cost = s[i - 1] == tJ ? 0 : 1; // cost
                // minimum of cell to the left+1, to the top+1, diagonally left and up +cost                
                d[i] = Math.Min(Math.Min(d[i - 1] + 1, p[i] + 1), p[i - 1] + cost);
            }
    
            // copy current distance counts to 'previous row' distance counts
            int[] dPlaceholder = p; //placeholder to assist in swapping p and d
            p = d;
            d = dPlaceholder;
        }
    
        // our last action in the above loop was to switch d and p, so p now 
        // actually has the most recent cost counts
        return p[n];
    }
    
    0 讨论(0)
提交回复
热议问题