How to save pictures from a website to a local folder

前端未结

关注

 3  539

I\'d need to save pictures from this website in a folder:

http://www.photobirdireland.com/garden-birds.html

I\'ve tried by using import os

from l


                      
              相关标签:


      
      
        
          3条回答        

        
                         				            
            
           
            
                              
                
              
              
                
                  无人及你        
                
              
                            
                2021-01-21 12:52
              
            
            
                                                                       
Try following code to download images.use urlretrieve to download image src value to location.

from urllib.request import urlretrieve
import requests
from bs4 import BeautifulSoup
import os
url='http://www.photobirdireland.com/garden-birds.html'
data=requests.get(url).text
soup=BeautifulSoup(data,"html.parser")
images=['http://www.photobirdireland.com/'+ image['src'] for image in soup.find_all('img')]

for img in images:
    urlretrieve(img,os.path.basename(img)) 

                                                                        
                                                        
            
            
              
                
                0
              
                 
                
               讨论(0)
              
              
                                                   
              
                                                            
            
                      
                    


               
            
    发布评论:
    
         
                        
    
    提交评论 
  
  

                    
                    
                    
                        
                        
                         加载中...
                        
                    
                
          
          	          
            
           
            
                              
                
              
              
                
                  我在风中等你        
                
              
                            
                2021-01-21 13:08
              
            
            
                                                                       
this html = urlopen(url) should be html = urlopen(self.url)

edit: you can fetch the urls like this

def scrape_images(self):
       html = urlopen(selfurl)
       bs4 = bs(html, 'html.parser')

       urls = []
       for img in bs4.find_all('img'):
           urls.append(img.attrs.get("src"))

       return urls


and the next step would be finding out how to download them.
                                                                        
                                                        
            
            
              
                
                0
              
                 
                
               讨论(0)
              
              
                                                   
              
                                                            
            
                      
                    


               
            
    发布评论:
    
         
                        
    
    提交评论 
  
  

                    
                    
                    
                        
                        
                         加载中...
                        
                    
                
          
          	          
            
           
            
                              
                
              
              
                
                  故里飘歌        
                
              
                            
                2021-01-21 13:11
              
            
            
                                                                       
You code is incomplete, 1st run loop on images = bs4.find_all('img', {})

Example

for image in images:
    # get the img url
    img_url = image.get('src').replace('\\', '/')
    real_url = "http://www.photobirdireland.com/" + img_url

    # get the image name
    img_name = str(img_url.split('/')[-1])

    # now download the image using - import urllib.request & import os
    print("downloading {}".format(img_url))
    urllib.request.urlretrieve(real_url, os.path.join(path, img_name))




Complete code should look the following-

import os
import urllib.request
import requests
from urllib.request import urlopen
from bs4 import BeautifulSoup as Bs


class ImageScraper:
    def __init__(self, url, download_path):
        self.url = url
        self.download_path = download_path
        self.session = requests.Session()

    def scrape_images(self):
        path = self.download_path
        html = urlopen(self.url)
        bs4 = Bs(html, 'html.parser')
        images = bs4.find_all('img', {})

        for image in images:
            # get the img url
            img_url = image.get('src').replace('\\', '/')
            real_url = "http://www.photobirdireland.com/" + img_url
            print(real_url)
            # get the image name
            img_name = str(img_url.split('/')[-1])
            print(img_name)
            print("downloading {}".format(img_url))
            urllib.request.urlretrieve(real_url, os.path.join(path, img_name))


scraper = ImageScraper(
    url="http://www.photobirdireland.com/garden-birds.html", download_path=r"D:\Temp\Images")
scraper.scrape_images()

                                                                        
                                                        
            
            
              
                
                0
              
                 
                
               讨论(0)
              
              
                                                   
              
                                                            
            
                      
                    


               
            
    发布评论:
    
         
                        
    
    提交评论 
  
  

                    
                    
                    
                        
                        
                         加载中...
                        
                    
                
          
          	          
                             
        
        
          
            
            
              
              
            
    


                                 
              
            
                          
    

        
         
                验证码
                
                  
                
                
                   看不清?
                
              
                                  
                    
   
                 
             
              提交回复