What is the best possible way to check if a string can be represented as a number in Python?
The function I currently have right now is:
def is_numb
You can generalize the exception technique in a useful way by returning more useful values than True and False. For example this function puts quotes round strings but leaves numbers alone. Which is just what I needed for a quick and dirty filter to make some variable definitions for R.
import sys
def fix_quotes(s):
try:
float(s)
return s
except ValueError:
return '"{0}"'.format(s)
for line in sys.stdin:
input = line.split()
print input[0], '<- c(', ','.join(fix_quotes(c) for c in input[1:]), ')'
I wanted to see which method is fastest. Overall the best and most consistent results were given by the check_replace
function. The fastest results were given by the check_exception
function, but only if there was no exception fired - meaning its code is the most efficient, but the overhead of throwing an exception is quite large.
Please note that checking for a successful cast is the only method which is accurate, for example, this works with check_exception
but the other two test functions will return False for a valid float:
huge_number = float('1e+100')
Here is the benchmark code:
import time, re, random, string
ITERATIONS = 10000000
class Timer:
def __enter__(self):
self.start = time.clock()
return self
def __exit__(self, *args):
self.end = time.clock()
self.interval = self.end - self.start
def check_regexp(x):
return re.compile("^\d*\.?\d*$").match(x) is not None
def check_replace(x):
return x.replace('.','',1).isdigit()
def check_exception(s):
try:
float(s)
return True
except ValueError:
return False
to_check = [check_regexp, check_replace, check_exception]
print('preparing data...')
good_numbers = [
str(random.random() / random.random())
for x in range(ITERATIONS)]
bad_numbers = ['.' + x for x in good_numbers]
strings = [
''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(random.randint(1,10)))
for x in range(ITERATIONS)]
print('running test...')
for func in to_check:
with Timer() as t:
for x in good_numbers:
res = func(x)
print('%s with good floats: %s' % (func.__name__, t.interval))
with Timer() as t:
for x in bad_numbers:
res = func(x)
print('%s with bad floats: %s' % (func.__name__, t.interval))
with Timer() as t:
for x in strings:
res = func(x)
print('%s with strings: %s' % (func.__name__, t.interval))
Here are the results with Python 2.7.10 on a 2017 MacBook Pro 13:
check_regexp with good floats: 12.688639
check_regexp with bad floats: 11.624862
check_regexp with strings: 11.349414
check_replace with good floats: 4.419841
check_replace with bad floats: 4.294909
check_replace with strings: 4.086358
check_exception with good floats: 3.276668
check_exception with bad floats: 13.843092
check_exception with strings: 15.786169
Here are the results with Python 3.6.5 on a 2017 MacBook Pro 13:
check_regexp with good floats: 13.472906000000009
check_regexp with bad floats: 12.977665000000016
check_regexp with strings: 12.417542999999995
check_replace with good floats: 6.011045999999993
check_replace with bad floats: 4.849356
check_replace with strings: 4.282754000000011
check_exception with good floats: 6.039081999999979
check_exception with bad floats: 9.322753000000006
check_exception with strings: 9.952595000000002
Here are the results with PyPy 2.7.13 on a 2017 MacBook Pro 13:
check_regexp with good floats: 2.693217
check_regexp with bad floats: 2.744819
check_regexp with strings: 2.532414
check_replace with good floats: 0.604367
check_replace with bad floats: 0.538169
check_replace with strings: 0.598664
check_exception with good floats: 1.944103
check_exception with bad floats: 2.449182
check_exception with strings: 2.200056
I needed to determine if a string cast into basic types (float,int,str,bool). After not finding anything on the internet I created this:
def str_to_type (s):
""" Get possible cast type for a string
Parameters
----------
s : string
Returns
-------
float,int,str,bool : type
Depending on what it can be cast to
"""
try:
f = float(s)
if "." not in s:
return int
return float
except ValueError:
value = s.upper()
if value == "TRUE" or value == "FALSE":
return bool
return type(s)
Example
str_to_type("true") # bool
str_to_type("6.0") # float
str_to_type("6") # int
str_to_type("6abc") # str
str_to_type(u"6abc") # unicode
You can capture the type and use it
s = "6.0"
type_ = str_to_type(s) # float
f = type_(s)
User helper function:
def if_ok(fn, string):
try:
return fn(string)
except Exception as e:
return None
then
if_ok(int, my_str) or if_ok(float, my_str) or if_ok(complex, my_str)
is_number = lambda s: any([if_ok(fn, s) for fn in (int, float, complex)])
This answer provides step by step guide having function with examples to find the string is:
You may use str.isdigit() to check whether given string is positive integer.
Sample Results:
# For digit
>>> '1'.isdigit()
True
>>> '1'.isalpha()
False
str.isdigit()
returns False
if the string is a negative number or a float number. For example:
# returns `False` for float
>>> '123.3'.isdigit()
False
# returns `False` for negative number
>>> '-123'.isdigit()
False
If you want to also check for the negative integers and float, then you may write a custom function to check for it as:
def is_number(n):
try:
float(n) # Type-casting the string to `float`.
# If string is not a valid `float`,
# it'll raise `ValueError` exception
except ValueError:
return False
return True
Sample Run:
>>> is_number('123') # positive integer number
True
>>> is_number('123.4') # positive float number
True
>>> is_number('-123') # negative integer number
True
>>> is_number('-123.4') # negative `float` number
True
>>> is_number('abc') # `False` for "some random" string
False
The above functions will return True
for the "NAN" (Not a number) string because for Python it is valid float representing it is not a number. For example:
>>> is_number('NaN')
True
In order to check whether the number is "NaN", you may use math.isnan() as:
>>> import math
>>> nan_num = float('nan')
>>> math.isnan(nan_num)
True
Or if you don't want to import additional library to check this, then you may simply check it via comparing it with itself using ==
. Python returns False
when nan
float is compared with itself. For example:
# `nan_num` variable is taken from above example
>>> nan_num == nan_num
False
Hence, above function is_number
can be updated to return False
for "NaN"
as:
def is_number(n):
is_number = True
try:
num = float(n)
# check for "nan" floats
is_number = num == num # or use `math.isnan(num)`
except ValueError:
is_number = False
return is_number
Sample Run:
>>> is_number('Nan') # not a number "Nan" string
False
>>> is_number('nan') # not a number string "nan" with all lower cased
False
>>> is_number('123') # positive integer
True
>>> is_number('-123') # negative integer
True
>>> is_number('-1.12') # negative `float`
True
>>> is_number('abc') # "some random" string
False
PS: Each operation for each check depending on the type of number comes with additional overhead. Choose the version of is_number
function which fits your requirement.
So to put it all together, checking for Nan, infinity and complex numbers (it would seem they are specified with j, not i, i.e. 1+2j) it results in:
def is_number(s):
try:
n=str(float(s))
if n == "nan" or n=="inf" or n=="-inf" : return False
except ValueError:
try:
complex(s) # for complex
except ValueError:
return False
return True