爬取58同城二手车信息,小白学python最缺的就是成就感,那就从简易爬虫开始吧……代码略显冗余
#! python3
import requests, time, openpyxl
from bs4 import BeautifulSoup
header = {
'User-Agent':'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.25 Safari/537.36 Core/1.70.3626.400 QQBrowser/10.4.3211.400'
}
def get_car_links(url): #定义函数获取车辆url
car_links = []
res = requests.get(url,headers=header)
res.raise_for_status()
soup = BeautifulSoup(res.text,'html.parser')
links = soup.select('h5 > a')
for link in links:
car_links.append(link.get('href'))
return car_links
def get_car_info(): #定义函数获取车辆信息info,并创建excel表格,写入获取的车辆信息
wb = openpyxl.Workbook()
ws = wb.active
ws.title = '新车'
wb.create_sheet('二手车',0)
wb.save('E:\\汽车信息汇总.xlsx')
wb = openpyxl.load_workbook('E:\\汽车信息汇总.xlsx')
ws_old = wb['二手车']
ws_new = wb['新车']
ws_old['A1'] = '车型信息'
ws_old['B1'] = '价格:万'
ws_old['C1'] = '行驶公里数'
ws_old['D1'] = '看车地址'
row = 2
num = 0
for n in range(1,100):
print('Downloading the %s page.'.center(50,'*')%n)
url = 'https://huizhou.58.com/ershouche/pn' + str(n) + '/?pane=true' #爬取第1-100页面的二手车辆信息
car_urls = get_car_links(url) #调用get_car_links()函数获取车辆urls
for car_url in car_urls: #遍历car_urls
res = requests.get(car_url,headers=header)
res.raise_for_status()
soup = BeautifulSoup(res.text,'html.parser')
time.sleep(2)
titles = soup.select('div.content_title p.title_p')
prices = soup.select('span.price_span span.jiage')
infos = soup.select('div.lcsp_info ul.clearfix li span')
adresses = soup.select('div.adress span.addre')
for title,price,info,adress in zip(titles, prices, infos, adresses):
ws_old['A%s'%row] = title.text
ws_old['B%s'%row] = price.text
ws_old['C%s'%row] = info.text
ws_old['D%s'%row] = adress.text
row += 1
num += 1
print(f'Downloading infomation of the {num} car.')
wb.save('E:\\汽车信息汇总_copy.xlsx') #保存为附件
get_car_info()
来源:CSDN
作者:亚李士多德
链接:https://blog.csdn.net/yali_shiduode/article/details/89304950