Python efficiently split currency sign and number in one string

前端未结

关注

 6  881

I have a string like \'$200,000,000\' or \'Yan300,000,000\'

I want to split the currency and number, and output a tuple (\'$\', \'200


                      
              相关标签:


      
      
        
          6条回答        

        
                         				            
            
           
            
                              
                
              
              
                
                  无人共我        
                
              
                            
                2021-01-16 07:07
              
            
            
                                                                       
You can use regex for this. 

p1 = re.compile("\d")  #match digits
p2 = re.compile("\D")  match non-digits


currency_symbol = p1.split(cur_str)[0]
value = int("".join([group for group in p2.split(cur_str)]))

                                                                        
                                                        
            
            
              
                
                0
              
                 
                
               讨论(0)
              
              
                                                   
              
                                                            
            
                      
                    


               
            
    发布评论:
    
         
                        
    
    提交评论 
  
  

                    
                    
                    
                        
                        
                         加载中...
                        
                    
                
          
          	          
            
           
            
                              
                
              
              
                
                  温柔的废话        
                
              
                            
                2021-01-16 07:16
              
            
            
                                                                       
import locale
import re
locale.setlocale(locale.LC_ALL, 'en_US.UTF-8')

def split_currency(text):
    _, currency, num = re.split('^(\D+)', text, 1)
    num = locale.atoi(num)
    return currency, num
print(split_currency('$200,000,000'))
# ('$', 200000000)
print(split_currency('Yan300,000,000'))
# ('Yan', 300000000)


split_currency will raise a ValueError if text does not start with a currency symbol (or anything that is not a digit). You could use try...except to handle that case differently if you wish.
                                                                        
                                                        
            
            
              
                
                0
              
                 
                
               讨论(0)
              
              
                                                   
              
                                                            
            
                      
                    


               
            
    发布评论:
    
         
                        
    
    提交评论 
  
  

                    
                    
                    
                        
                        
                         加载中...
                        
                    
                
          
          	          
            
           
            
                              
                
              
              
                
                  余生分开走        
                
              
                            
                2021-01-16 07:17
              
            
            
                                                                       
>>> import re
>>> string = 'YAN300,000,000'
>>> match = re.search(r'([\D]+)([\d,]+)', string)
>>> output = (match.group(1), match.group(2).replace(',',''))
>>> output
('YAN', '300000000')


(Thanks to zhangyangyu for pointing out I hadn't fully answered the question)
                                                                        
                                                        
            
            
              
                
                0
              
                 
                
               讨论(0)
              
              
                                                   
              
                                                            
            
                      
                    


               
            
    发布评论:
    
         
                        
    
    提交评论 
  
  

                    
                    
                    
                        
                        
                         加载中...
                        
                    
                
          
          	          
            
           
            
                              
                
              
              
                
                  梦谈多话        
                
              
                            
                2021-01-16 07:17
              
            
            
                                                                       
>>> filter(str.isdigit, s)
'200000000'
>>> filter(lambda x: not x.isdigit() and x != ',', s)
'$'
>>> 
>>> (filter(lambda x: not x.isdigit() and x != ',' ,s), filter(str.isdigit, s))
('$', '200000000')
>>> 

                                                                        
                                                        
            
            
              
                
                0
              
                 
                
               讨论(0)
              
              
                                                   
              
                                                            
            
                      
                    


               
            
    发布评论:
    
         
                        
    
    提交评论 
  
  

                    
                    
                    
                        
                        
                         加载中...
                        
                    
                
          
          	          
            
           
            
                              
                
              
              
                
                  遥遥无期        
                
              
                            
                2021-01-16 07:17
              
            
            
                                                                       
>>> import itertools
>>> myStr = '$200,000,000'
>>> ''.join(itertools.dropwhile(lambda c: not c.isdigit(), myStr))
'200,000,000'
>>> myStr = 'Yan300,000,000'
>>> ''.join(itertools.dropwhile(lambda c: not c.isdigit(), myStr))
'300,000,000'


Similarly, you could use itertools.takewhile with the same lambda function to get the currency sign. However, this might be simpler:

idx = itertools.dropwhile(lambda c: not c.isdigit()).next()
sign, val = myStr[:idx], myStr[idx:]

                                                                        
                                                        
            
            
              
                
                0
              
                 
                
               讨论(0)
              
              
                                                   
              
                                                            
            
                      
                    


               
            
    发布评论:
    
         
                        
    
    提交评论 
  
  

                    
                    
                    
                        
                        
                         加载中...
                        
                    
                
          
          	          
            
           
            
                              
                
              
              
                
                  灰色年华        
                
              
                            
                2021-01-16 07:19
              
            
            
                                                                       
It wont be faster I bet ... but I think its more readable

>>> cur_string = "asd1,23456,123,1233"
>>> cur_sym = re.search(r"([^0-9, ]*)[0-9]","asd123").groups()[0]
>>> cur = re.sub("[^0-9]","",cur_string)
>>> print cur_sym,int(cur)
asd 1234561231233

                                                                        
                                                        
            
            
              
                
                0
              
                 
                
               讨论(0)
              
              
                                                   
              
                                                            
            
                      
                    


               
            
    发布评论:
    
         
                        
    
    提交评论 
  
  

                    
                    
                    
                        
                        
                         加载中...
                        
                    
                
          
          	          
                             
        
        
          
            
            
              
              
            
    


                                 
              
            
                          
    

        
         
                验证码
                
                  
                
                
                   看不清?
                
              
                                  
                    
   
                 
             
              提交回复