爬取豆瓣电影Top250
import requests from lxml import etree # 获取豆瓣电影排行前250的电影信息 count = 0 for page in range(0,10): url = 'https://movie.douban.com/top250?start=%s&filter='%(page*25) response = requests.get(url) root = etree.HTML(response.content) big_cate_list = root.xpath("//ol[@class='grid_view']/li") # 标签的 href 属性用于指定超链接目标的 URL for big_cate in big_cate_list: count = count + 1 # big_cate_href = big_cate.xpath("div/div/a/@href")[0] big_cate_alt = big_cate.xpath("div/div/a/img/@alt")[0] big_cate_src = big_cate.xpath("div/div/a/img/@src")[0] big_cate_intro = big_cate.xpath("div/div[@class='info']/div[@class='bd']/p