Is there a way to convert number words to Integers?

前端 未结 16 2001
北恋
北恋 2020-11-22 06:14

I need to convert one into 1, two into 2 and so on.

Is there a way to do this with a library or a class or anythi

相关标签:
16条回答
  • 2020-11-22 06:49

    Make use of the python package : WordToDigits

    pip install wordtodigits

    It can find numbers present in word form in a sentence and then convert them to the proper numeric format. Also takes care of the decimal part, if present. The word representation of numbers could be anywhere in the passage.

    https://pypi.org/project/wordtodigits/

    0 讨论(0)
  • 2020-11-22 06:52

    Quick and dirty Java port of e_h's C# implementation (above). Note that both return double, not int.

    public class Text2Double {
    
        public double Text2Double(String text) {
    
            String[] units = new String[]{
                    "zero", "one", "two", "three", "four", "five", "six", "seven", "eight",
                    "nine", "ten", "eleven", "twelve", "thirteen", "fourteen", "fifteen",
                    "sixteen", "seventeen", "eighteen", "nineteen",
            };
    
            String[] tens = new String[]{"", "", "twenty", "thirty", "forty", "fifty", "sixty", "seventy", "eighty", "ninety"};
    
            String[] scales = new String[]{"hundred", "thousand", "million", "billion", "trillion"};
    
            Map<String, ScaleIncrementPair> numWord = new LinkedHashMap<>();
            numWord.put("and", new ScaleIncrementPair(1, 0));
    
    
            for (int i = 0; i < units.length; i++) {
                numWord.put(units[i], new ScaleIncrementPair(1, i));
            }
    
            for (int i = 1; i < tens.length; i++) {
                numWord.put(tens[i], new ScaleIncrementPair(1, i * 10));
            }
    
            for (int i = 0; i < scales.length; i++) {
                if (i == 0)
                    numWord.put(scales[i], new ScaleIncrementPair(100, 0));
                else
                    numWord.put(scales[i], new ScaleIncrementPair(Math.pow(10, (i * 3)), 0));
            }
    
            double current = 0;
            double result = 0;
    
            for(String word : text.split("[ -]"))
            {
                ScaleIncrementPair scaleIncrement = numWord.get(word);
                current = current * scaleIncrement.scale + scaleIncrement.increment;
                if (scaleIncrement.scale > 100) {
                    result += current;
                    current = 0;
                }
            }
            return result + current;
        }
    }
    
    public class ScaleIncrementPair
    {
        public double scale;
        public int increment;
    
        public ScaleIncrementPair(double s, int i)
        {
            scale = s;
            increment = i;
        }
    }
    
    0 讨论(0)
  • 2020-11-22 06:56

    If anyone is interested, I hacked up a version that maintains the rest of the string (though it may have bugs, haven't tested it too much).

    def text2int (textnum, numwords={}):
        if not numwords:
            units = [
            "zero", "one", "two", "three", "four", "five", "six", "seven", "eight",
            "nine", "ten", "eleven", "twelve", "thirteen", "fourteen", "fifteen",
            "sixteen", "seventeen", "eighteen", "nineteen",
            ]
    
            tens = ["", "", "twenty", "thirty", "forty", "fifty", "sixty", "seventy", "eighty", "ninety"]
    
            scales = ["hundred", "thousand", "million", "billion", "trillion"]
    
            numwords["and"] = (1, 0)
            for idx, word in enumerate(units):  numwords[word] = (1, idx)
            for idx, word in enumerate(tens):       numwords[word] = (1, idx * 10)
            for idx, word in enumerate(scales): numwords[word] = (10 ** (idx * 3 or 2), 0)
    
        ordinal_words = {'first':1, 'second':2, 'third':3, 'fifth':5, 'eighth':8, 'ninth':9, 'twelfth':12}
        ordinal_endings = [('ieth', 'y'), ('th', '')]
    
        textnum = textnum.replace('-', ' ')
    
        current = result = 0
        curstring = ""
        onnumber = False
        for word in textnum.split():
            if word in ordinal_words:
                scale, increment = (1, ordinal_words[word])
                current = current * scale + increment
                if scale > 100:
                    result += current
                    current = 0
                onnumber = True
            else:
                for ending, replacement in ordinal_endings:
                    if word.endswith(ending):
                        word = "%s%s" % (word[:-len(ending)], replacement)
    
                if word not in numwords:
                    if onnumber:
                        curstring += repr(result + current) + " "
                    curstring += word + " "
                    result = current = 0
                    onnumber = False
                else:
                    scale, increment = numwords[word]
    
                    current = current * scale + increment
                    if scale > 100:
                        result += current
                        current = 0
                    onnumber = True
    
        if onnumber:
            curstring += repr(result + current)
    
        return curstring
    

    Example:

     >>> text2int("I want fifty five hot dogs for two hundred dollars.")
     I want 55 hot dogs for 200 dollars.
    

    There could be issues if you have, say, "$200". But, this was really rough.

    0 讨论(0)
  • 2020-11-22 06:58

    This is the c# implementation of the code in 1st answer:

    public static double ConvertTextToNumber(string text)
    {
        string[] units = new string[] {
            "zero", "one", "two", "three", "four", "five", "six", "seven", "eight",
            "nine", "ten", "eleven", "twelve", "thirteen", "fourteen", "fifteen",
            "sixteen", "seventeen", "eighteen", "nineteen",
        };
    
        string[] tens = new string[] {"", "", "twenty", "thirty", "forty", "fifty", "sixty", "seventy", "eighty", "ninety"};
    
        string[] scales = new string[] { "hundred", "thousand", "million", "billion", "trillion" };
    
        Dictionary<string, ScaleIncrementPair> numWord = new Dictionary<string, ScaleIncrementPair>();
        numWord.Add("and", new ScaleIncrementPair(1, 0));
        for (int i = 0; i < units.Length; i++)
        {
            numWord.Add(units[i], new ScaleIncrementPair(1, i));
        }
    
        for (int i = 1; i < tens.Length; i++)
        {
            numWord.Add(tens[i], new ScaleIncrementPair(1, i * 10));                
        }
    
        for (int i = 0; i < scales.Length; i++)
        {
            if(i == 0)
                numWord.Add(scales[i], new ScaleIncrementPair(100, 0));
            else
                numWord.Add(scales[i], new ScaleIncrementPair(Math.Pow(10, (i*3)), 0));
        }
    
        double current = 0;
        double result = 0;
    
        foreach (var word in text.Split(new char[] { ' ', '-', '—'}))
        {
            ScaleIncrementPair scaleIncrement = numWord[word];
            current = current * scaleIncrement.scale + scaleIncrement.increment;
            if (scaleIncrement.scale > 100)
            {
                result += current;
                current = 0;
            }
        }
        return result + current;
    }
    
    
    public struct ScaleIncrementPair
    {
        public double scale;
        public int increment;
        public ScaleIncrementPair(double s, int i)
        {
            scale = s;
            increment = i;
        }
    }
    
    0 讨论(0)
  • 2020-11-22 06:59

    The majority of this code is to set up the numwords dict, which is only done on the first call.

    def text2int(textnum, numwords={}):
        if not numwords:
          units = [
            "zero", "one", "two", "three", "four", "five", "six", "seven", "eight",
            "nine", "ten", "eleven", "twelve", "thirteen", "fourteen", "fifteen",
            "sixteen", "seventeen", "eighteen", "nineteen",
          ]
    
          tens = ["", "", "twenty", "thirty", "forty", "fifty", "sixty", "seventy", "eighty", "ninety"]
    
          scales = ["hundred", "thousand", "million", "billion", "trillion"]
    
          numwords["and"] = (1, 0)
          for idx, word in enumerate(units):    numwords[word] = (1, idx)
          for idx, word in enumerate(tens):     numwords[word] = (1, idx * 10)
          for idx, word in enumerate(scales):   numwords[word] = (10 ** (idx * 3 or 2), 0)
    
        current = result = 0
        for word in textnum.split():
            if word not in numwords:
              raise Exception("Illegal word: " + word)
    
            scale, increment = numwords[word]
            current = current * scale + increment
            if scale > 100:
                result += current
                current = 0
    
        return result + current
    
    print text2int("seven billion one hundred million thirty one thousand three hundred thirty seven")
    #7100031337
    
    0 讨论(0)
  • 2020-11-22 06:59

    Made change so that text2int(scale) will return correct conversion. Eg, text2int("hundred") => 100.

    import re
    
    numwords = {}
    
    
    def text2int(textnum):
    
        if not numwords:
    
            units = [ "zero", "one", "two", "three", "four", "five", "six",
                    "seven", "eight", "nine", "ten", "eleven", "twelve",
                    "thirteen", "fourteen", "fifteen", "sixteen", "seventeen",
                    "eighteen", "nineteen"]
    
            tens = ["", "", "twenty", "thirty", "forty", "fifty", "sixty", 
                    "seventy", "eighty", "ninety"]
    
            scales = ["hundred", "thousand", "million", "billion", "trillion", 
                    'quadrillion', 'quintillion', 'sexillion', 'septillion', 
                    'octillion', 'nonillion', 'decillion' ]
    
            numwords["and"] = (1, 0)
            for idx, word in enumerate(units): numwords[word] = (1, idx)
            for idx, word in enumerate(tens): numwords[word] = (1, idx * 10)
            for idx, word in enumerate(scales): numwords[word] = (10 ** (idx * 3 or 2), 0)
    
        ordinal_words = {'first':1, 'second':2, 'third':3, 'fifth':5, 
                'eighth':8, 'ninth':9, 'twelfth':12}
        ordinal_endings = [('ieth', 'y'), ('th', '')]
        current = result = 0
        tokens = re.split(r"[\s-]+", textnum)
        for word in tokens:
            if word in ordinal_words:
                scale, increment = (1, ordinal_words[word])
            else:
                for ending, replacement in ordinal_endings:
                    if word.endswith(ending):
                        word = "%s%s" % (word[:-len(ending)], replacement)
    
                if word not in numwords:
                    raise Exception("Illegal word: " + word)
    
                scale, increment = numwords[word]
    
            if scale > 1:
                current = max(1, current)
    
            current = current * scale + increment
            if scale > 100:
                result += current
                current = 0
    
        return result + current
    
    0 讨论(0)
提交回复
热议问题