scrapy-Redis 分布式爬虫
案例1 :(增量式爬取)京东全部图书,自己可以扩展 爬取每一本电子书的评论 1.spider: # -*- coding: utf-8 -*- import scrapy from copy import deepcopy import json import urllib class JdSpider(scrapy.Spider): name = ' jd ' allowed_domains = [ ' jd.com ' , ' p.3.cn ' ] start_urls = [ ' https://book.jd.com/booksort.html ' ] def parse(self, response): dt_list = response.xpath( " //div[@class='mc']/dl/dt " ) # 大分类列表 for dt in dt_list: item = {} item[ " b_cate " ] = dt.xpath( " ./a/text() " ).extract_first() em_list = dt.xpath( " ./following-sibling::dd[1]/em " ) # 小分类列表 for em in em_list: item[ " s_href " ] = em.xpath( " ./a/@href " )