Natural Sort Order in C#

后端 未结 17 2088
野性不改
野性不改 2020-11-21 04:54

Anyone have a good resource or provide a sample of a natural order sort in C# for an FileInfo array? I am implementing the IComparer interface in

17条回答
  •  陌清茗
    陌清茗 (楼主)
    2020-11-21 05:44

    None of the existing implementations looked great so I wrote my own. The results are almost identical to the sorting used by modern versions of Windows Explorer (Windows 7/8). The only differences I've seen are 1) although Windows used to (e.g. XP) handle numbers of any length, it's now limited to 19 digits - mine is unlimited, 2) Windows gives inconsistent results with certain sets of Unicode digits - mine works fine (although it doesn't numerically compare digits from surrogate pairs; nor does Windows), and 3) mine can't distinguish different types of non-primary sort weights if they occur in different sections (e.g. "e-1é" vs "é1e-" - the sections before and after the number have diacritic and punctuation weight differences).

    public static int CompareNatural(string strA, string strB) {
        return CompareNatural(strA, strB, CultureInfo.CurrentCulture, CompareOptions.IgnoreCase);
    }
    
    public static int CompareNatural(string strA, string strB, CultureInfo culture, CompareOptions options) {
        CompareInfo cmp = culture.CompareInfo;
        int iA = 0;
        int iB = 0;
        int softResult = 0;
        int softResultWeight = 0;
        while (iA < strA.Length && iB < strB.Length) {
            bool isDigitA = Char.IsDigit(strA[iA]);
            bool isDigitB = Char.IsDigit(strB[iB]);
            if (isDigitA != isDigitB) {
                return cmp.Compare(strA, iA, strB, iB, options);
            }
            else if (!isDigitA && !isDigitB) {
                int jA = iA + 1;
                int jB = iB + 1;
                while (jA < strA.Length && !Char.IsDigit(strA[jA])) jA++;
                while (jB < strB.Length && !Char.IsDigit(strB[jB])) jB++;
                int cmpResult = cmp.Compare(strA, iA, jA - iA, strB, iB, jB - iB, options);
                if (cmpResult != 0) {
                    // Certain strings may be considered different due to "soft" differences that are
                    // ignored if more significant differences follow, e.g. a hyphen only affects the
                    // comparison if no other differences follow
                    string sectionA = strA.Substring(iA, jA - iA);
                    string sectionB = strB.Substring(iB, jB - iB);
                    if (cmp.Compare(sectionA + "1", sectionB + "2", options) ==
                        cmp.Compare(sectionA + "2", sectionB + "1", options))
                    {
                        return cmp.Compare(strA, iA, strB, iB, options);
                    }
                    else if (softResultWeight < 1) {
                        softResult = cmpResult;
                        softResultWeight = 1;
                    }
                }
                iA = jA;
                iB = jB;
            }
            else {
                char zeroA = (char)(strA[iA] - (int)Char.GetNumericValue(strA[iA]));
                char zeroB = (char)(strB[iB] - (int)Char.GetNumericValue(strB[iB]));
                int jA = iA;
                int jB = iB;
                while (jA < strA.Length && strA[jA] == zeroA) jA++;
                while (jB < strB.Length && strB[jB] == zeroB) jB++;
                int resultIfSameLength = 0;
                do {
                    isDigitA = jA < strA.Length && Char.IsDigit(strA[jA]);
                    isDigitB = jB < strB.Length && Char.IsDigit(strB[jB]);
                    int numA = isDigitA ? (int)Char.GetNumericValue(strA[jA]) : 0;
                    int numB = isDigitB ? (int)Char.GetNumericValue(strB[jB]) : 0;
                    if (isDigitA && (char)(strA[jA] - numA) != zeroA) isDigitA = false;
                    if (isDigitB && (char)(strB[jB] - numB) != zeroB) isDigitB = false;
                    if (isDigitA && isDigitB) {
                        if (numA != numB && resultIfSameLength == 0) {
                            resultIfSameLength = numA < numB ? -1 : 1;
                        }
                        jA++;
                        jB++;
                    }
                }
                while (isDigitA && isDigitB);
                if (isDigitA != isDigitB) {
                    // One number has more digits than the other (ignoring leading zeros) - the longer
                    // number must be larger
                    return isDigitA ? 1 : -1;
                }
                else if (resultIfSameLength != 0) {
                    // Both numbers are the same length (ignoring leading zeros) and at least one of
                    // the digits differed - the first difference determines the result
                    return resultIfSameLength;
                }
                int lA = jA - iA;
                int lB = jB - iB;
                if (lA != lB) {
                    // Both numbers are equivalent but one has more leading zeros
                    return lA > lB ? -1 : 1;
                }
                else if (zeroA != zeroB && softResultWeight < 2) {
                    softResult = cmp.Compare(strA, iA, 1, strB, iB, 1, options);
                    softResultWeight = 2;
                }
                iA = jA;
                iB = jB;
            }
        }
        if (iA < strA.Length || iB < strB.Length) {
            return iA < strA.Length ? 1 : -1;
        }
        else if (softResult != 0) {
            return softResult;
        }
        return 0;
    }
    

    The signature matches the Comparison delegate:

    string[] files = Directory.GetFiles(@"C:\");
    Array.Sort(files, CompareNatural);
    

    Here's a wrapper class for use as IComparer:

    public class CustomComparer : IComparer {
        private Comparison _comparison;
    
        public CustomComparer(Comparison comparison) {
            _comparison = comparison;
        }
    
        public int Compare(T x, T y) {
            return _comparison(x, y);
        }
    }
    

    Example:

    string[] files = Directory.EnumerateFiles(@"C:\")
        .OrderBy(f => f, new CustomComparer(CompareNatural))
        .ToArray();
    

    Here's a good set of filenames I use for testing:

    Func expand = (s) => { int o; while ((o = s.IndexOf('\\')) != -1) { int p = o + 1;
        int z = 1; while (s[p] == '0') { z++; p++; } int c = Int32.Parse(s.Substring(p, z));
        s = s.Substring(0, o) + new string(s[o - 1], c) + s.Substring(p + z); } return s; };
    string encodedFileNames =
        "KDEqLW4xMiotbjEzKjAwMDFcMDY2KjAwMlwwMTcqMDA5XDAxNyowMlwwMTcqMDlcMDE3KjEhKjEtISox" +
        "LWEqMS4yNT8xLjI1KjEuNT8xLjUqMSoxXDAxNyoxXDAxOCoxXDAxOSoxXDA2NioxXDA2NyoxYSoyXDAx" +
        "NyoyXDAxOCo5XDAxNyo5XDAxOCo5XDA2Nio9MSphMDAxdGVzdDAxKmEwMDF0ZXN0aW5nYTBcMzEqYTAw" +
        "Mj9hMDAyIGE/YTAwMiBhKmEwMDIqYTAwMmE/YTAwMmEqYTAxdGVzdGluZ2EwMDEqYTAxdnNmcyphMSph" +
        "MWEqYTF6KmEyKmIwMDAzcTYqYjAwM3E0KmIwM3E1KmMtZSpjZCpjZipmIDEqZipnP2cgMT9oLW4qaG8t" +
        "bipJKmljZS1jcmVhbT9pY2VjcmVhbT9pY2VjcmVhbS0/ajBcNDE/ajAwMWE/ajAxP2shKmsnKmstKmsx" +
        "KmthKmxpc3QqbTAwMDNhMDA1YSptMDAzYTAwMDVhKm0wMDNhMDA1Km0wMDNhMDA1YSpuMTIqbjEzKm8t" +
        "bjAxMypvLW4xMipvLW40P28tbjQhP28tbjR6P28tbjlhLWI1Km8tbjlhYjUqb24wMTMqb24xMipvbjQ/" +
        "b240IT9vbjR6P29uOWEtYjUqb245YWI1Km/CrW4wMTMqb8KtbjEyKnAwMCpwMDEqcDAxwr0hKnAwMcK9" +
        "KnAwMcK9YSpwMDHCvcK+KnAwMipwMMK9KnEtbjAxMypxLW4xMipxbjAxMypxbjEyKnItMDAhKnItMDAh" +
        "NSpyLTAwIe+8lSpyLTAwYSpyLe+8kFwxIS01KnIt77yQXDEhLe+8lSpyLe+8kFwxISpyLe+8kFwxITUq" +
        "ci3vvJBcMSHvvJUqci3vvJBcMWEqci3vvJBcMyE1KnIwMCEqcjAwLTUqcjAwLjUqcjAwNSpyMDBhKnIw" +
        "NSpyMDYqcjQqcjUqctmg2aYqctmkKnLZpSpy27Dbtipy27Qqctu1KnLfgN+GKnLfhCpy34UqcuClpuCl" +
        "rCpy4KWqKnLgpasqcuCnpuCnrCpy4KeqKnLgp6sqcuCppuCprCpy4KmqKnLgqasqcuCrpuCrrCpy4Kuq" +
        "KnLgq6sqcuCtpuCtrCpy4K2qKnLgrasqcuCvpuCvrCpy4K+qKnLgr6sqcuCxpuCxrCpy4LGqKnLgsasq" +
        "cuCzpuCzrCpy4LOqKnLgs6sqcuC1puC1rCpy4LWqKnLgtasqcuC5kOC5lipy4LmUKnLguZUqcuC7kOC7" +
        "lipy4LuUKnLgu5UqcuC8oOC8pipy4LykKnLgvKUqcuGBgOGBhipy4YGEKnLhgYUqcuGCkOGClipy4YKU" +
        "KnLhgpUqcuGfoOGfpipy4Z+kKnLhn6UqcuGgkOGglipy4aCUKnLhoJUqcuGlhuGljCpy4aWKKnLhpYsq" +
        "cuGnkOGnlipy4aeUKnLhp5UqcuGtkOGtlipy4a2UKnLhrZUqcuGusOGutipy4a60KnLhrrUqcuGxgOGx" +
        "hipy4bGEKnLhsYUqcuGxkOGxlipy4bGUKnLhsZUqcuqYoFwx6pilKnLqmKDqmKUqcuqYoOqYpipy6pik" +
        "KnLqmKUqcuqjkOqjlipy6qOUKnLqo5UqcuqkgOqkhipy6qSEKnLqpIUqcuqpkOqplipy6qmUKnLqqZUq" +
        "cvCQkqAqcvCQkqUqcvCdn5gqcvCdn50qcu+8kFwxISpy77yQXDEt77yVKnLvvJBcMS7vvJUqcu+8kFwx" +
        "YSpy77yQXDHqmKUqcu+8kFwx77yO77yVKnLvvJBcMe+8lSpy77yQ77yVKnLvvJDvvJYqcu+8lCpy77yV" +
        "KnNpKnPEsSp0ZXN02aIqdGVzdNmi2aAqdGVzdNmjKnVBZS0qdWFlKnViZS0qdUJlKnVjZS0xw6kqdWNl" +
        "McOpLSp1Y2Uxw6kqdWPDqS0xZSp1Y8OpMWUtKnVjw6kxZSp3ZWlhMSp3ZWlhMip3ZWlzczEqd2Vpc3My" +
        "KndlaXoxKndlaXoyKndlacOfMSp3ZWnDnzIqeSBhMyp5IGE0KnknYTMqeSdhNCp5K2EzKnkrYTQqeS1h" +
        "Myp5LWE0KnlhMyp5YTQqej96IDA1MD96IDIxP3ohMjE/ejIwP3oyMj96YTIxP3rCqTIxP1sxKl8xKsKt" +
        "bjEyKsKtbjEzKsSwKg==";
    string[] fileNames = Encoding.UTF8.GetString(Convert.FromBase64String(encodedFileNames))
        .Replace("*", ".txt?").Split(new[] { "?" }, StringSplitOptions.RemoveEmptyEntries)
        .Select(n => expand(n)).ToArray();
    

提交回复
热议问题