Is there a way to convert number words to Integers? Is there a way to convert number words to Integers? python python

Is there a way to convert number words to Integers?


The majority of this code is to set up the numwords dict, which is only done on the first call.

def text2int(textnum, numwords={}):    if not numwords:      units = [        "zero", "one", "two", "three", "four", "five", "six", "seven", "eight",        "nine", "ten", "eleven", "twelve", "thirteen", "fourteen", "fifteen",        "sixteen", "seventeen", "eighteen", "nineteen",      ]      tens = ["", "", "twenty", "thirty", "forty", "fifty", "sixty", "seventy", "eighty", "ninety"]      scales = ["hundred", "thousand", "million", "billion", "trillion"]      numwords["and"] = (1, 0)      for idx, word in enumerate(units):    numwords[word] = (1, idx)      for idx, word in enumerate(tens):     numwords[word] = (1, idx * 10)      for idx, word in enumerate(scales):   numwords[word] = (10 ** (idx * 3 or 2), 0)    current = result = 0    for word in textnum.split():        if word not in numwords:          raise Exception("Illegal word: " + word)        scale, increment = numwords[word]        current = current * scale + increment        if scale > 100:            result += current            current = 0    return result + currentprint text2int("seven billion one hundred million thirty one thousand three hundred thirty seven")#7100031337


I have just released a python module to PyPI called word2number for the exact purpose. https://github.com/akshaynagpal/w2n

Install it using:

pip install word2number

make sure your pip is updated to the latest version.

Usage:

from word2number import w2nprint w2n.word_to_num("two million three thousand nine hundred and eighty four")2003984


If anyone is interested, I hacked up a version that maintains the rest of the string (though it may have bugs, haven't tested it too much).

def text2int (textnum, numwords={}):    if not numwords:        units = [        "zero", "one", "two", "three", "four", "five", "six", "seven", "eight",        "nine", "ten", "eleven", "twelve", "thirteen", "fourteen", "fifteen",        "sixteen", "seventeen", "eighteen", "nineteen",        ]        tens = ["", "", "twenty", "thirty", "forty", "fifty", "sixty", "seventy", "eighty", "ninety"]        scales = ["hundred", "thousand", "million", "billion", "trillion"]        numwords["and"] = (1, 0)        for idx, word in enumerate(units):  numwords[word] = (1, idx)        for idx, word in enumerate(tens):       numwords[word] = (1, idx * 10)        for idx, word in enumerate(scales): numwords[word] = (10 ** (idx * 3 or 2), 0)    ordinal_words = {'first':1, 'second':2, 'third':3, 'fifth':5, 'eighth':8, 'ninth':9, 'twelfth':12}    ordinal_endings = [('ieth', 'y'), ('th', '')]    textnum = textnum.replace('-', ' ')    current = result = 0    curstring = ""    onnumber = False    for word in textnum.split():        if word in ordinal_words:            scale, increment = (1, ordinal_words[word])            current = current * scale + increment            if scale > 100:                result += current                current = 0            onnumber = True        else:            for ending, replacement in ordinal_endings:                if word.endswith(ending):                    word = "%s%s" % (word[:-len(ending)], replacement)            if word not in numwords:                if onnumber:                    curstring += repr(result + current) + " "                curstring += word + " "                result = current = 0                onnumber = False            else:                scale, increment = numwords[word]                current = current * scale + increment                if scale > 100:                    result += current                    current = 0                onnumber = True    if onnumber:        curstring += repr(result + current)    return curstring

Example:

 >>> text2int("I want fifty five hot dogs for two hundred dollars.") I want 55 hot dogs for 200 dollars.

There could be issues if you have, say, "$200". But, this was really rough.