test

蓝咒 提交于 2019-12-01 08:52:46

test\

# -*- coding: utf-8 -*-
# @Time    :2019/10/14  20:45 
# Author   :李成广(63)
# @Email   :chengguang.li@dili.com
# @File    :Spider.py
# @Brief   :爬虫主程序


import requests
from bs4 import BeautifulSoup


spider_url='https://www.doutula.com/photo/list/?page=1'
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.117 Safari/537.36'
}
page1 = requests.get(spider_url,headers=headers)

soup = BeautifulSoup(page1.text, "html.parser")
print(soup)

div = soup.find(name='div', attrs={'class': 'page-content text-center'})
print(div)
div2 = div.find(name='div')
a_list = div2.find_all(name='a')
for a in a_list:
    img = a.find(name='img')
    data_original = img.attrs.get('data-original')
    title = img.attrs.get('alt')
    if not data_original:
        continue
    data = {"title":title,"url":data_original}
    print(data)

  

标签
易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!