I\'d need to save pictures from this website in a folder:
http://www.photobirdireland.com/garden-birds.html
I\'ve tried by using import os
from l
Try following code to download images.use urlretrieve
to download image src value to location.
from urllib.request import urlretrieve
import requests
from bs4 import BeautifulSoup
import os
url='http://www.photobirdireland.com/garden-birds.html'
data=requests.get(url).text
soup=BeautifulSoup(data,"html.parser")
images=['http://www.photobirdireland.com/'+ image['src'] for image in soup.find_all('img')]
for img in images:
urlretrieve(img,os.path.basename(img))
this html = urlopen(url)
should be html = urlopen(self.url)
edit: you can fetch the urls like this
def scrape_images(self):
html = urlopen(selfurl)
bs4 = bs(html, 'html.parser')
urls = []
for img in bs4.find_all('img'):
urls.append(img.attrs.get("src"))
return urls
and the next step would be finding out how to download them.
You code is incomplete, 1st run loop on images = bs4.find_all('img', {})
Example
for image in images:
# get the img url
img_url = image.get('src').replace('\\', '/')
real_url = "http://www.photobirdireland.com/" + img_url
# get the image name
img_name = str(img_url.split('/')[-1])
# now download the image using - import urllib.request & import os
print("downloading {}".format(img_url))
urllib.request.urlretrieve(real_url, os.path.join(path, img_name))
Complete code should look the following-
import os
import urllib.request
import requests
from urllib.request import urlopen
from bs4 import BeautifulSoup as Bs
class ImageScraper:
def __init__(self, url, download_path):
self.url = url
self.download_path = download_path
self.session = requests.Session()
def scrape_images(self):
path = self.download_path
html = urlopen(self.url)
bs4 = Bs(html, 'html.parser')
images = bs4.find_all('img', {})
for image in images:
# get the img url
img_url = image.get('src').replace('\\', '/')
real_url = "http://www.photobirdireland.com/" + img_url
print(real_url)
# get the image name
img_name = str(img_url.split('/')[-1])
print(img_name)
print("downloading {}".format(img_url))
urllib.request.urlretrieve(real_url, os.path.join(path, img_name))
scraper = ImageScraper(
url="http://www.photobirdireland.com/garden-birds.html", download_path=r"D:\Temp\Images")
scraper.scrape_images()