How to save pictures from a website to a local folder

前端 未结 3 537
爱一瞬间的悲伤
爱一瞬间的悲伤 2021-01-21 12:28

I\'d need to save pictures from this website in a folder:

http://www.photobirdireland.com/garden-birds.html

I\'ve tried by using import os

from l         


        
相关标签:
3条回答
  • 2021-01-21 12:52

    Try following code to download images.use urlretrieve to download image src value to location.

    from urllib.request import urlretrieve
    import requests
    from bs4 import BeautifulSoup
    import os
    url='http://www.photobirdireland.com/garden-birds.html'
    data=requests.get(url).text
    soup=BeautifulSoup(data,"html.parser")
    images=['http://www.photobirdireland.com/'+ image['src'] for image in soup.find_all('img')]
    
    for img in images:
        urlretrieve(img,os.path.basename(img)) 
    
    0 讨论(0)
  • 2021-01-21 13:08

    this html = urlopen(url) should be html = urlopen(self.url)

    edit: you can fetch the urls like this

    def scrape_images(self):
           html = urlopen(selfurl)
           bs4 = bs(html, 'html.parser')
    
           urls = []
           for img in bs4.find_all('img'):
               urls.append(img.attrs.get("src"))
    
           return urls
    

    and the next step would be finding out how to download them.

    0 讨论(0)
  • 2021-01-21 13:11

    You code is incomplete, 1st run loop on images = bs4.find_all('img', {})

    Example

    for image in images:
        # get the img url
        img_url = image.get('src').replace('\\', '/')
        real_url = "http://www.photobirdireland.com/" + img_url
    
        # get the image name
        img_name = str(img_url.split('/')[-1])
    
        # now download the image using - import urllib.request & import os
        print("downloading {}".format(img_url))
        urllib.request.urlretrieve(real_url, os.path.join(path, img_name))
    

    Complete code should look the following-

    import os
    import urllib.request
    import requests
    from urllib.request import urlopen
    from bs4 import BeautifulSoup as Bs
    
    
    class ImageScraper:
        def __init__(self, url, download_path):
            self.url = url
            self.download_path = download_path
            self.session = requests.Session()
    
        def scrape_images(self):
            path = self.download_path
            html = urlopen(self.url)
            bs4 = Bs(html, 'html.parser')
            images = bs4.find_all('img', {})
    
            for image in images:
                # get the img url
                img_url = image.get('src').replace('\\', '/')
                real_url = "http://www.photobirdireland.com/" + img_url
                print(real_url)
                # get the image name
                img_name = str(img_url.split('/')[-1])
                print(img_name)
                print("downloading {}".format(img_url))
                urllib.request.urlretrieve(real_url, os.path.join(path, img_name))
    
    
    scraper = ImageScraper(
        url="http://www.photobirdireland.com/garden-birds.html", download_path=r"D:\Temp\Images")
    scraper.scrape_images()
    
    0 讨论(0)
提交回复
热议问题