DotNet Soundex Function

前端 未结 4 887
太阳男子
太阳男子 2021-01-01 06:14

I have a database table that has a column of SQLServer Soundex encoded last name + first name. In my C# program I would like to convert a string using soundex for use in my

相关标签:
4条回答
  • 2021-01-01 06:20

    Based on the answer of Dotnet Services and tigrou, I have corrected the algorithm in order to reflect the function described in Wikipedia.

    Test cases such as Ashcraft = A226, Tymczak = T522, Pfister = P236 and Honeyman = H555 are now working correctly.

    public static string Soundex(string data)
    {
        StringBuilder result = new StringBuilder();
    
        if (data != null && data.Length > 0)
        {
            string previousCode = "", currentCode = "", currentLetter = "";
            result.Append(data[0]); // keep initial char
    
            for (int i = 0; i < data.Length; i++) //start at 0 in order to correctly encode "Pf..."
            {
                currentLetter = data[i].ToString().ToLower();
                currentCode = "";
    
                if ("bfpv".Contains(currentLetter)) 
                    currentCode = "1";
                else if ("cgjkqsxz".Contains(currentLetter))
                    currentCode = "2";
                else if ("dt".Contains(currentLetter))
                    currentCode = "3";
                else if (currentLetter == "l")
                    currentCode = "4";
                else if ("mn".Contains(currentLetter))
                    currentCode = "5";
                else if (currentLetter == "r")
                    currentCode = "6";
    
                if (currentCode != previousCode && i > 0) // do not add first code to result string
                    result.Append(currentCode);
    
                if (result.Length == 4) break;
    
                previousCode = currentCode; // always retain previous code, even empty
            }
        }
        if (result.Length < 4)
            result.Append(new String('0', 4 - result.Length));
    
        return result.ToString().ToUpper();
    }
    
    0 讨论(0)
  • 2021-01-01 06:24

    According to the algorithm described in wikipedia

        private string Soundex(string word)
        {
            word = word.ToUpper();
            word = word[0] + 
                Regex.Replace(
                    Regex.Replace(
                    Regex.Replace(
                    Regex.Replace(
                    Regex.Replace(
                    Regex.Replace(
                    Regex.Replace(word.Substring(1), "[AEIOUYHW]",""),
                    "[BFPV]+", "1"),
                    "[CGJKQSXZ]+", "2"),
                    "[DT]+","3"),
                    "[L]+","4"),
                    "[MN]+","5"),
                    "[R]+","6")
                ;
            return word.PadRight(4,'0').Substring(0,4);
        }
    
    0 讨论(0)
  • 2021-01-01 06:44

    I know this is late, but I also needed something similar (though no database involved), and the only answer isn't accurate (fails for 'Tymczak' and 'Pfister').

    This is what I came up with:

    class Program
    {
        public static void Main(string[] args)
        {
                    Assert.AreEqual(Soundex.Generate("H"), "H000");
                    Assert.AreEqual(Soundex.Generate("Robert"), "R163");
                    Assert.AreEqual(Soundex.Generate("Rupert"), "R163");
                    Assert.AreEqual(Soundex.Generate("Rubin"), "R150");
                    Assert.AreEqual(Soundex.Generate("Ashcraft"), "A261");
                    Assert.AreEqual(Soundex.Generate("Ashcroft"), "A261");
                    Assert.AreEqual(Soundex.Generate("Tymczak"), "T522");
                    Assert.AreEqual(Soundex.Generate("Pfister"), "P236");
                    Assert.AreEqual(Soundex.Generate("Gutierrez"), "G362");
                    Assert.AreEqual(Soundex.Generate("Jackson"), "J250");
                    Assert.AreEqual(Soundex.Generate("VanDeusen"), "V532");
                    Assert.AreEqual(Soundex.Generate("Deusen"), "D250");
                    Assert.AreEqual(Soundex.Generate("Sword"), "S630");
                    Assert.AreEqual(Soundex.Generate("Sord"), "S630");
                    Assert.AreEqual(Soundex.Generate("Log-out"), "L230");
                    Assert.AreEqual(Soundex.Generate("Logout"), "L230");
                    Assert.AreEqual(Soundex.Generate("123"), Soundex.Empty);
                    Assert.AreEqual(Soundex.Generate(""), Soundex.Empty);
                    Assert.AreEqual(Soundex.Generate(null), Soundex.Empty);
        }
    }
    
    public static class Soundex
    {
        public const string Empty = "0000";
    
        private static readonly Regex Sanitiser = new Regex(@"[^A-Z]", RegexOptions.Compiled);
        private static readonly Regex CollapseRepeatedNumbers = new Regex(@"(\d)?\1*[WH]*\1*", RegexOptions.Compiled);
        private static readonly Regex RemoveVowelSounds = new Regex(@"[AEIOUY]", RegexOptions.Compiled);
    
        public static string Generate(string Phrase)
        {
            // Remove non-alphas
            Phrase = Sanitiser.Replace((Phrase ?? string.Empty).ToUpper(), string.Empty);
    
            // Nothing to soundex, return empty
            if (string.IsNullOrEmpty(Phrase))
                return Empty;
    
            // Convert consonants to numerical representation
            var Numified = Numify(Phrase);
    
            // Remove repeated numberics (characters of the same sound class), even if separated by H or W
            Numified = CollapseRepeatedNumbers.Replace(Numified, @"$1");
    
            if (Numified.Length > 0 && Numified[0] == Numify(Phrase[0]))
            {
                // Remove first numeric as first letter in same class as subsequent letters
                Numified = Numified.Substring(1);
            }
    
            // Remove vowels
            Numified = RemoveVowelSounds.Replace(Numified, string.Empty);
    
            // Concatenate, pad and trim to ensure X### format.
            return string.Format("{0}{1}", Phrase[0], Numified).PadRight(4, '0').Substring(0, 4);
        }
    
        private static string Numify(string Phrase)
        {
            return new string(Phrase.ToCharArray().Select(Numify).ToArray());
        }
    
        private static char Numify(char Character)
        {
            switch (Character)
            {
                case 'B': case 'F': case 'P': case 'V':
                    return '1';
                case 'C': case 'G': case 'J': case 'K': case 'Q': case 'S': case 'X': case 'Z':
                    return '2';
                case 'D': case 'T':
                    return '3';
                case 'L':
                    return '4';
                case 'M': case 'N':
                    return '5';
                case 'R':
                    return '6';
                default:
                    return Character;
            }
        }
    }
    
    0 讨论(0)
  • 2021-01-01 06:44

    You could use something like this in c# per SQL

    public static string Soundex(string data)
        {
            StringBuilder result = new StringBuilder();
    
            if (data != null && data.Length > 0)
            {
                string previousCode = "", currentCode = "", currentLetter = "";
    
                result.Append(data.Substring(0, 1));
    
                for (int i = 1; i < data.Length; i++) 
                {
                    currentLetter = data.Substring(i, 1).ToLower();
                    currentCode = "";
    
                    if ("bfpv".IndexOf(currentLetter) > -1)
                        currentCode = "1";
    
                    else if ("cgjkqsxz".IndexOf(currentLetter) > -1)
                        currentCode = "2";
    
                    else if ("dt".IndexOf(currentLetter) > -1)
                        currentCode = "3";
    
    
                    else if (currentLetter == "l")
                        currentCode = "4";
    
                    else if ("mn".IndexOf(currentLetter) > -1)
                        currentCode = "5";
    
                    else if (currentLetter == "r")
                        currentCode = "6";
    
                    if (currentCode != previousCode)
                        result.Append(currentCode);
    
                    if (result.Length == 4) break;
    
                    if (currentCode != "")
                        previousCode = currentCode;
    
                }
            }
            if (result.Length < 4)
                result.Append(new String('0', 4 - result.Length));
    
            return result.ToString().ToUpper();
        }
    
    0 讨论(0)
提交回复
热议问题