python - find the occurrence of the word in a file

前端未结

关注

 6  399

I am trying to find the count of words that occured in a file. I have a text file (TEST.txt) the content of the file is as follows:

ashwin prog


                      
              相关标签:


      
      
        
          6条回答        

        
                         				            
            
           
            
                              
                
              
              
                
                  滥情空心        
                
              
                            
                2020-12-03 22:14
              
            
            
                                                                       
from collections import Counter;
cnt = Counter ();

for line in open ('TEST.txt', 'r'):
  for word in line.split ():
    cnt [word] += 1

print cnt

                                                                        
                                                        
            
            
              
                
                0
              
                 
                
               讨论(0)
              
              
                                                   
              
                                                            
            
                      
                    


               
            
    发布评论:
    
         
                        
    
    提交评论 
  
  

                    
                    
                    
                        
                        
                         加载中...
                        
                    
                
          
          	          
            
           
            
                              
                
              
              
                
                  清歌不尽        
                
              
                            
                2020-12-03 22:19
              
            
            
                                                                       
Using a Defaultdict:

from collections import defaultdict 

def read_file(fname):

    words_dict = defaultdict(int)
    fp = open(fname, 'r')
    lines = fp.readlines()
    words = []

    for line in lines:
        words += line.split(' ')

    for word in words:
        words_dict[word] += 1

    return words_dict

                                                                        
                                                        
            
            
              
                
                0
              
                 
                
               讨论(0)
              
              
                                                   
              
                                                            
            
                      
                    


               
            
    发布评论:
    
         
                        
    
    提交评论 
  
  

                    
                    
                    
                        
                        
                         加载中...
                        
                    
                
          
          	          
            
           
            
                              
                
              
              
                
                  梦如初夏        
                
              
                            
                2020-12-03 22:19
              
            
            
                                                                       
FILE_NAME = 'file.txt'

wordCounter = {}

with open(FILE_NAME,'r') as fh:
  for line in fh:
    # Replacing punctuation characters. Making the string to lower.
    # The split will spit the line into a list.
    word_list = line.replace(',','').replace('\'','').replace('.','').lower().split()
    for word in word_list:
      # Adding  the word into the wordCounter dictionary.
      if word not in wordCounter:
        wordCounter[word] = 1
      else:
        # if the word is already in the dictionary update its count.
        wordCounter[word] = wordCounter[word] + 1

print('{:15}{:3}'.format('Word','Count'))
print('-' * 18)

# printing the words and its occurrence.
for  (word,occurance)  in wordCounter.items(): 
  print('{:15}{:3}'.format(word,occurance))

                                                                        
                                                        
            
            
              
                
                0
              
                 
                
               讨论(0)
              
              
                                                   
              
                                                            
            
                      
                    


               
            
    发布评论:
    
         
                        
    
    提交评论 
  
  

                    
                    
                    
                        
                        
                         加载中...
                        
                    
                
          
          	          
            
           
            
                              
                
              
              
                
                  自闭症患者        
                
              
                            
                2020-12-03 22:23
              
            
            
                                                                       
Use the update method of Counter.  Example:

from collections import Counter

data = '''\
ashwin programmer india
amith programmer india'''

c = Counter()
for line in data.splitlines():
    c.update(line.split())
print(c)


Output:

Counter({'india': 2, 'programmer': 2, 'amith': 1, 'ashwin': 1})

                                                                        
                                                        
            
            
              
                
                0
              
                 
                
               讨论(0)
              
              
                                                   
              
                                                            
            
                      
                    


               
            
    发布评论:
    
         
                        
    
    提交评论 
  
  

                    
                    
                    
                        
                        
                         加载中...
                        
                    
                
          
          	          
            
           
            
                              
                
              
              
                
                  天涯浪人        
                
              
                            
                2020-12-03 22:24
              
            
            
                                                                       
f = open('input.txt', 'r')
data=f.read().lower()
list1=data.split()

d={}
for i in set(list1):
    d[i]=0

for i in list1:
    for j in d.keys():
       if i==j:
          d[i]=d[i]+1
print(d)

                                                                        
                                                        
            
            
              
                
                0
              
                 
                
               讨论(0)
              
              
                                                   
              
                                                            
            
                      
                    


               
            
    发布评论:
    
         
                        
    
    提交评论 
  
  

                    
                    
                    
                        
                        
                         加载中...
                        
                    
                
          
          	          
            
           
            
                              
                
              
              
                
                  被撕碎了的回忆        
                
              
                            
                2020-12-03 22:40
              
            
            
                                                                       
You're iterating over every line and calling Counter each time. You want Counter to run over the entire file. Try:

from collections import Counter

with open("TEST.txt", "r") as f:
    # Used file context read and save into contents
    contents = f.read().split()
print Counter(contents)

                                                                        
                                                        
            
            
              
                
                0
              
                 
                
               讨论(0)
              
              
                                                   
              
                                                            
            
                      
                    


               
            
    发布评论:
    
         
                        
    
    提交评论 
  
  

                    
                    
                    
                        
                        
                         加载中...
                        
                    
                
          
          	          
                             
        
        
          
            
            
              
              
            
    


                                 
              
            
                          
    

        
         
                验证码
                
                  
                
                
                   看不清?
                
              
                                  
                    
   
                 
             
              提交回复