初尝爬取58同城车辆信息

落爺英雄遲暮 提交于 2019-12-08 22:02:46

爬取58同城二手车信息,小白学python最缺的就是成就感,那就从简易爬虫开始吧……代码略显冗余

#! python3

import requests, time, openpyxl
from bs4 import BeautifulSoup

header = {
    'User-Agent':'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.25 Safari/537.36 Core/1.70.3626.400 QQBrowser/10.4.3211.400'
     }
     
def get_car_links(url): #定义函数获取车辆url
    car_links = []
    res = requests.get(url,headers=header)
    res.raise_for_status()
    soup = BeautifulSoup(res.text,'html.parser')
    links = soup.select('h5 > a')
    for link in links:
        car_links.append(link.get('href'))
    return car_links

def get_car_info(): #定义函数获取车辆信息info,并创建excel表格,写入获取的车辆信息

    wb = openpyxl.Workbook()
    ws = wb.active
    ws.title = '新车'
    wb.create_sheet('二手车',0)
    wb.save('E:\\汽车信息汇总.xlsx')
    
    wb = openpyxl.load_workbook('E:\\汽车信息汇总.xlsx')
    ws_old = wb['二手车']
    ws_new = wb['新车']
    ws_old['A1'] = '车型信息'
    ws_old['B1'] = '价格:万'
    ws_old['C1'] = '行驶公里数'
    ws_old['D1'] = '看车地址'
    
    row = 2
    num = 0
    
    for n in range(1,100):
        print('Downloading the %s page.'.center(50,'*')%n) 
        url = 'https://huizhou.58.com/ershouche/pn' + str(n) + '/?pane=true' #爬取第1-100页面的二手车辆信息
        car_urls = get_car_links(url) #调用get_car_links()函数获取车辆urls
        for car_url in car_urls: #遍历car_urls 
            res = requests.get(car_url,headers=header)
            res.raise_for_status()
            soup = BeautifulSoup(res.text,'html.parser')
            time.sleep(2)
            titles = soup.select('div.content_title p.title_p')
            prices = soup.select('span.price_span span.jiage')
            infos = soup.select('div.lcsp_info  ul.clearfix li span')
            adresses = soup.select('div.adress span.addre')
            for title,price,info,adress in zip(titles, prices, infos, adresses):
              ws_old['A%s'%row] = title.text
              ws_old['B%s'%row] = price.text
              ws_old['C%s'%row] = info.text
              ws_old['D%s'%row] = adress.text
              row += 1
              num += 1
              print(f'Downloading infomation of the {num} car.')
              wb.save('E:\\汽车信息汇总_copy.xlsx') #保存为附件
              
get_car_info()
    

标签
易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!