爬天极网美女图片缩略图:

∥☆過路亽.° 提交于 2020-01-18 01:32:19
import os#导入发送请求模块:import requests#导入解析文本模块:from bs4 import BeautifulSoup#返回上一级目录:base_path = os.path.dirname(os.path.abspath(__file__))#路径和图片文件夹拼接:img1_path = os.path.join(base_path,"img1")#获取响应:response = requests.get("http://pic.yesky.com/c/6_20491_1.shtml")#将请求结果交给bs4解析:soup = BeautifulSoup(response.text,"html.parser")#经过分析定位到<div class="lb_box">:div_obj = soup.find(name="div",attrs={"class":"lb_box"})#从div中找所有的dl标签(每一张图片的外部标点)、结果是列表list_dd = div_obj.find_all(name = "dd")#循环每一张图片的dl:for dd in list_dd:    #从dd中找到a:    a_obj = dd.find("a")    #拼接文件夹的路径并创建文件夹:    dir_path = os.path.join(img1_path,a_obj.text)    #如果文件夹不存在就创建:    if not os.path.isdir(dir_path):        os.mkdir(dir_path)    #拿到链接href:    a_response = requests.get(a_obj.get("href"))    #转码    a_response.encoding = "gbk"    #文本解析    soup2 = BeautifulSoup(a_response.text,"html.parser")    #拿到整体的div:    div_obj2 = soup2.find(name = "div",attrs={"class":"overview"})    print(div_obj2)    img_list = div_obj2.find_all(name = "img")    for img in img_list:        img_src = img.get("src")        img_response = requests.get(img_src)        file_path = os.path.join(dir_path,img_src.rsplit("/",1)[-1])        with open(file_path,"wb") as f:            f.write(img_response.content)    # break效果如下:

标签
易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!