I need to convert a string in the format \"1.234.345,00\"
to the float
value 1234345.00
.
One way is to use repeated str.repl
If you pop open the source code for locale, you can see that there is a variable called _override_localeconv
(which seems to be for testing purposes).
# With this dict, you can override some items of localeconv's return value.
# This is useful for testing purposes.
_override_localeconv = {}
Trying the following does seem to override the dictionary without changing the entire locale, though it probably has some unintended consequences, especially since changing locales isn't threadsafe. Be careful!
import locale
locale._override_localeconv["thousands_sep"] = "."
locale._override_localeconv["decimal_point"] = ","
print locale.atof('123.456,78')
Try it online!
Here's something, using Babel, that works for me.
First you feed it some test data, with your expectations and it builds a dictionary of separator to locale alias that fits.
Then you can convert from that point on.
import string
from decimal import Decimal
from babel.numbers import parse_decimal, NumberFormatError
from babel.core import UnknownLocaleError
import locale
traindata = [
("1.234.345,00", Decimal("1234345.00")),
("1,234,345.00", Decimal("1234345.00")),
("345", Decimal("345.00")),
]
data = traindata + [
("345,00", Decimal("345.00")),
("345.00", Decimal("345.00")),
("746", Decimal("746.00")),
]
def findseps(input_):
#you need to have no separator
#or at least a decimal separator for this to work...
seps = [c for c in input_ if not c in string.digits]
if not seps:
return ""
sep = seps[-1]
#if the decimal is something then thousand will be the other...
seps = "." + sep if sep == "," else "," + sep
return seps
def setup(input_, exp, lookup):
key = findseps(input_)
if key in lookup:
return
for alias in locale.locale_alias:
#print(alias)
try:
got = parse_decimal(input_, locale=alias)
except (NumberFormatError,UnknownLocaleError, ValueError) as e:
continue
except (Exception,) as e:
raise
if exp == got:
lookup[key] = alias
return
def convert(input_, lookup):
seps = findseps(input_)
try:
locale_ = lookup[seps]
convert.locale_ = locale_
except (KeyError,) as e:
convert.locale_ = None
return "unexpected seps:%s" % seps
try:
return parse_decimal(input_, locale=locale_)
except (Exception,) as e:
return e
lookup = {}
#train your data
for input_, exp in traindata:
setup(input_, exp, lookup)
#once it's trained you know which locales to use
print(data)
for input_, exp in data:
got = convert(input_, lookup)
# print (input_)
msg = "%s => %s with local:%s:" % (input_, got, convert.locale_)
if exp == got:
print("\n success : " + msg)
else:
print("\n failure : " + msg)
print(lookup)
output:
[('1.234.345,00', Decimal('1234345.00')), ('1,234,345.00', Decimal('1234345.00')), ('345', Decimal('345.00')), ('345,00', Decimal('345.00')), ('345.00', Decimal('345.00')), ('746', Decimal('746.00'))]
success : 1.234.345,00 => 1234345.00 with local:is_is:
success : 1,234,345.00 => 1234345.00 with local:ko_kr.euc:
success : 345 => 345 with local:ko_kr.euc:
success : 345,00 => 345.00 with local:is_is:
success : 345.00 => 345.00 with local:ko_kr.euc:
success : 746 => 746 with local:ko_kr.euc:
{',.': 'ko_kr.euc', '': 'ko_kr.euc', '.,': 'is_is'}
There are two parts in your question:
You can use the amazing Babel library for both.
One locale associated with a .
thousands separator and a ,
decimal separator is ger_de
, for German.
To parse it, simply use
>>> from babel.numbers import parse_decimal
>>> parse_decimal('1.234.345,00', locale='ger_de')
Decimal('1234345.00')
Use this routine which checks the string to parse against the expected value for all locales, and returns the ones that are compatible:
import locale
from babel.numbers import parse_decimal
from decimal import Decimal
def get_compatible_locales(string_to_parse, expected_decimal):
compatible_aliases = []
for alias in locale.locale_alias:
try:
parsed_decimal = parse_decimal(string_to_parse, locale=alias)
if parsed_decimal == expected_decimal:
compatible_aliases.append(alias)
except Exception:
continue
return compatible_aliases
For your example:
>>> print(get_compatible_locales('1.234.345,00', Decimal('1234345')))
['ar_dz', 'ar_lb', 'ar_ly', 'ar_ma', 'ar_tn', 'ast_es', 'az', 'az_az', 'az_az.iso88599e', 'bs', 'bs_ba', 'ca', 'ca_ad', 'ca_es', 'ca_es@valencia', 'ca_fr', 'ca_it', 'da', 'da_dk', 'de', 'de_at', 'de_be', 'de_de', 'de_lu', 'el', 'el_cy', 'el_gr', 'el_gr@euro', 'en_be', 'en_dk', 'es', 'es_ar', 'es_bo', 'es_cl', 'es_co', 'es_ec', 'es_es', 'es_py', 'es_uy', 'es_ve', 'eu', 'eu_es', 'fo', 'fo_fo', 'fr_lu', 'fy_nl', 'ger_de', 'gl', 'gl_es', 'hr', 'hr_hr', 'hsb_de', 'id', 'id_id', 'in', 'in_id', 'is', 'is_is', 'it', 'it_it', 'kl', 'kl_gl', 'km_kh', 'lb_lu', 'lo', 'lo_la', 'lo_la.cp1133', 'lo_la.ibmcp1133', 'lo_la.mulelao1', 'mk', 'mk_mk', 'nl', 'nl_aw', 'nl_be', 'nl_nl', 'ps_af', 'pt', 'pt_br', 'ro', 'ro_ro', 'rw', 'rw_rw', 'sl', 'sl_si', 'sr', 'sr@cyrillic', 'sr@latn', 'sr_cs', 'sr_cs.iso88592@latn', 'sr_cs@latn', 'sr_me', 'sr_rs', 'sr_rs@latn', 'sr_yu', 'sr_yu.cp1251@cyrillic', 'sr_yu.iso88592', 'sr_yu.iso88595', 'sr_yu.iso88595@cyrillic', 'sr_yu.microsoftcp1251@cyrillic', 'sr_yu.utf8', 'sr_yu.utf8@cyrillic', 'sr_yu@cyrillic', 'tr', 'tr_cy', 'tr_tr', 'vi', 'vi_vn', 'vi_vn.tcvn', 'vi_vn.tcvn5712', 'vi_vn.viscii', 'vi_vn.viscii111', 'wo_sn']
Use the following routine, where my_locale
should be your own locale:
from babel import Locale
def get_display_name(alias, my_locale='en_US'):
l = Locale.parse(alias)
return l.get_display_name(my_locale)
You can then use it this way:
>>> print({loc: get_display_name(loc) for loc in locales})
{'ar_dz': 'Arabic (Algeria)', 'ar_lb': 'Arabic (Lebanon)', 'ar_ly': 'Arabic (Libya)', 'ar_ma': 'Arabic (Morocco)', 'ar_tn': 'Arabic (Tunisia)', 'ast_es': 'Asturian (Spain)', 'az': 'Azerbaijani', 'az_az': 'Azerbaijani (Latin, Azerbaijan)', 'az_az.iso88599e': 'Azerbaijani (Latin, Azerbaijan)', 'bs': 'Bosnian', 'bs_ba': 'Bosnian (Latin, Bosnia & Herzegovina)', 'ca': 'Catalan', 'ca_ad': 'Catalan (Andorra)', 'ca_es': 'Catalan (Spain)', 'ca_es@valencia': 'Catalan (Spain)', 'ca_fr': 'Catalan (France)', 'ca_it': 'Catalan (Italy)', 'da': 'Danish', 'da_dk': 'Danish (Denmark)', 'de': 'German', 'de_at': 'German (Austria)', 'de_be': 'German (Belgium)', 'de_de': 'German (Germany)', 'de_lu': 'German (Luxembourg)', 'el': 'Greek', 'el_cy': 'Greek (Cyprus)', 'el_gr': 'Greek (Greece)', 'el_gr@euro': 'Greek (Greece)', 'en_be': 'English (Belgium)', 'en_dk': 'English (Denmark)', 'es': 'Spanish', 'es_ar': 'Spanish (Argentina)', 'es_bo': 'Spanish (Bolivia)', 'es_cl': 'Spanish (Chile)', 'es_co': 'Spanish (Colombia)', 'es_ec': 'Spanish (Ecuador)', 'es_es': 'Spanish (Spain)', 'es_py': 'Spanish (Paraguay)', 'es_uy': 'Spanish (Uruguay)', 'es_ve': 'Spanish (Venezuela)', 'eu': 'Basque', 'eu_es': 'Basque (Spain)', 'fo': 'Faroese', 'fo_fo': 'Faroese (Faroe Islands)', 'fr_lu': 'French (Luxembourg)', 'fy_nl': 'Western Frisian (Netherlands)', 'ger_de': 'German (Germany)', 'gl': 'Galician', 'gl_es': 'Galician (Spain)', 'hr': 'Croatian', 'hr_hr': 'Croatian (Croatia)', 'hsb_de': 'Upper Sorbian (Germany)', 'id': 'Indonesian', 'id_id': 'Indonesian (Indonesia)', 'in': 'Indonesian (Indonesia)', 'in_id': 'Indonesian (Indonesia)', 'is': 'Icelandic', 'is_is': 'Icelandic (Iceland)', 'it': 'Italian', 'it_it': 'Italian (Italy)', 'kl': 'Kalaallisut', 'kl_gl': 'Kalaallisut (Greenland)', 'km_kh': 'Khmer (Cambodia)', 'lb_lu': 'Luxembourgish (Luxembourg)', 'lo': 'Lao', 'lo_la': 'Lao (Laos)', 'lo_la.cp1133': 'Lao (Laos)', 'lo_la.ibmcp1133': 'Lao (Laos)', 'lo_la.mulelao1': 'Lao (Laos)', 'mk': 'Macedonian', 'mk_mk': 'Macedonian (Macedonia)', 'nl': 'Dutch', 'nl_aw': 'Dutch (Aruba)', 'nl_be': 'Dutch (Belgium)', 'nl_nl': 'Dutch (Netherlands)', 'ps_af': 'Pashto (Afghanistan)', 'pt': 'Portuguese', 'pt_br': 'Portuguese (Brazil)', 'ro': 'Romanian', 'ro_ro': 'Romanian (Romania)', 'rw': 'Kinyarwanda', 'rw_rw': 'Kinyarwanda (Rwanda)', 'sl': 'Slovenian', 'sl_si': 'Slovenian (Slovenia)', 'sr': 'Serbian', 'sr@cyrillic': 'Serbian', 'sr@latn': 'Serbian', 'sr_cs': 'Serbian (Cyrillic, Serbia)', 'sr_cs.iso88592@latn': 'Serbian (Cyrillic, Serbia)', 'sr_cs@latn': 'Serbian (Cyrillic, Serbia)', 'sr_me': 'Serbian (Latin, Montenegro)', 'sr_rs': 'Serbian (Cyrillic, Serbia)', 'sr_rs@latn': 'Serbian (Cyrillic, Serbia)', 'sr_yu': 'Serbian (Cyrillic, Serbia)', 'sr_yu.cp1251@cyrillic': 'Serbian (Cyrillic, Serbia)', 'sr_yu.iso88592': 'Serbian (Cyrillic, Serbia)', 'sr_yu.iso88595': 'Serbian (Cyrillic, Serbia)', 'sr_yu.iso88595@cyrillic': 'Serbian (Cyrillic, Serbia)', 'sr_yu.microsoftcp1251@cyrillic': 'Serbian (Cyrillic, Serbia)', 'sr_yu.utf8': 'Serbian (Cyrillic, Serbia)', 'sr_yu.utf8@cyrillic': 'Serbian (Cyrillic, Serbia)', 'sr_yu@cyrillic': 'Serbian (Cyrillic, Serbia)', 'tr': 'Turkish', 'tr_cy': 'Turkish (Cyprus)', 'tr_tr': 'Turkish (Turkey)', 'vi': 'Vietnamese', 'vi_vn': 'Vietnamese (Vietnam)', 'vi_vn.tcvn': 'Vietnamese (Vietnam)', 'vi_vn.tcvn5712': 'Vietnamese (Vietnam)', 'vi_vn.viscii': 'Vietnamese (Vietnam)', 'vi_vn.viscii111': 'Vietnamese (Vietnam)', 'wo_sn': 'Wolof (Senegal)'}
Try it online!