import requests,time,csv
from bs4 import BeautifulSoup
href_list=[]
url='https://hz.58.com/lvguanbinguan'
headers={
'user-agent':
}
params={
'PGTID': '0d306b32-0004-f711-87c6-3e3cb7371c0d',
'ClickID': '2'
}
res=requests.get(url,headers=headers,params=params)
soup=BeautifulSoup(res.text,'html.parser')
titles=soup.find_all('h2',class_="title")
for title in titles:
href=title.find('a')['href']
print(href)
href_list.append(href)
csv_file=open('58同城酒店转让项目信息.csv','w+',newline='',encoding='utf-8')
writer=csv.writer(csv_file)
file_head=['项目名','联系人','联系电话','信息','地址']
writer.writerow(file_head)
hotel_list=[]
for href in href_list:
url=href
headers={
'user-agent':
}
res=requests.get(url,headers=headers)
soup=BeautifulSoup(res.text,'lxml')
title=soup.find('h1',class_="c_000 f20").text
name=soup.find(class_="poster-name").text
phone=soup.find('p',class_="phone-num").text
detail=soup.find('p',class_="house_basic_title_info").text
address=soup.find('p',class_="p_2").text
print(title,phone)
hotel_list.append([title,name,phone,detail,address])
time.sleep(1)
for row in hotel_list:
writer.writerow(row)
csv_file.close()
来源:51CTO
作者:wx5d72071a58c07
链接:https://blog.51cto.com/14534896/2439531