Scraping google news with BeautifulSoup returns empty results

前端未结

关注

 1  1572

I am trying to scrape google news using the following code:

from bs4 import BeautifulSoup
import requests
import time
from random import randint


def scrape_new


                      
              相关标签:


      
      
        
          1条回答        

        
                         				            
            
           
            
                              
                
              
              
                
                  执念已碎        
                
              
                            
                2021-02-11 11:31
              
            
            
                                                                       
I tried running the code and it works fine on my computer.

You could try printing the status code for the request, and see if it's anything other than 200.

from bs4 import BeautifulSoup
import requests
import time
from random import randint


def scrape_news_summaries(s):
    time.sleep(randint(0, 2))  # relax and don't let google be angry
    r = requests.get("http://www.google.co.uk/search?q="+s+"&tbm=nws")
    print(r.status_code)  # Print the status code
    content = r.text
    news_summaries = []
    soup = BeautifulSoup(content, "html.parser")
    st_divs = soup.findAll("div", {"class": "st"})
    for st_div in st_divs:
        news_summaries.append(st_div.text)
    return news_summaries


l = scrape_news_summaries("T-Notes")
#l = scrape_news_summaries("""T-Notes""")
for n in l:
    print(n)


https://www.scrapehero.com/how-to-prevent-getting-blacklisted-while-scraping/ for a list of status code that's a sign you have been banned.
                                                                        
                                                        
            
            
              
                
                0
              
                 
                
               讨论(0)
              
              
                                                   
              
                                                            
            
                      
                    


               
            
    发布评论:
    
         
                        
    
    提交评论 
  
  

                    
                    
                    
                        
                        
                         加载中...
                        
                    
                
          
          	          
                             
        
        
          
            
            
              
              
            
    


                                 
              
            
                          
    

        
         
                验证码
                
                  
                
                
                   看不清?
                
              
                                  
                    
   
                 
             
              提交回复