test\
# -*- coding: utf-8 -*- # @Time :2019/10/14 20:45 # Author :李成广(63) # @Email :chengguang.li@dili.com # @File :Spider.py # @Brief :爬虫主程序 import requests from bs4 import BeautifulSoup spider_url='https://www.doutula.com/photo/list/?page=1' headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.117 Safari/537.36' } page1 = requests.get(spider_url,headers=headers) soup = BeautifulSoup(page1.text, "html.parser") print(soup) div = soup.find(name='div', attrs={'class': 'page-content text-center'}) print(div) div2 = div.find(name='div') a_list = div2.find_all(name='a') for a in a_list: img = a.find(name='img') data_original = img.attrs.get('data-original') title = img.attrs.get('alt') if not data_original: continue data = {"title":title,"url":data_original} print(data)