import os#导入发送请求模块:import requests#导入解析文本模块:from bs4 import BeautifulSoup#返回上一级目录:base_path = os.path.dirname(os.path.abspath(__file__))#路径和图片文件夹拼接:img1_path = os.path.join(base_path,"img1")#获取响应:response = requests.get("http://pic.yesky.com/c/6_20491_1.shtml")#将请求结果交给bs4解析:soup = BeautifulSoup(response.text,"html.parser")#经过分析定位到<div class="lb_box">:div_obj = soup.find(name="div",attrs={"class":"lb_box"})#从div中找所有的dl标签(每一张图片的外部标点)、结果是列表list_dd = div_obj.find_all(name = "dd")#循环每一张图片的dl:for dd in list_dd: #从dd中找到a: a_obj = dd.find("a") #拼接文件夹的路径并创建文件夹: dir_path = os.path.join(img1_path,a_obj.text) #如果文件夹不存在就创建: if not os.path.isdir(dir_path): os.mkdir(dir_path) #拿到链接href: a_response = requests.get(a_obj.get("href")) #转码 a_response.encoding = "gbk" #文本解析 soup2 = BeautifulSoup(a_response.text,"html.parser") #拿到整体的div: div_obj2 = soup2.find(name = "div",attrs={"class":"overview"}) print(div_obj2) img_list = div_obj2.find_all(name = "img") for img in img_list: img_src = img.get("src") img_response = requests.get(img_src) file_path = os.path.join(dir_path,img_src.rsplit("/",1)[-1]) with open(file_path,"wb") as f: f.write(img_response.content) # break效果如下:
来源:https://www.cnblogs.com/zhang-da/p/12208018.html