问题
I'm looking to scrape this link, with just two simple pieces of information, but I don't know why I have this result and it can't give me all the data I search for:
particulier_allinfo particulier_tel 0 ABEL KEVIN10 RUE VIRGILE67200 Strasbourg
This is the code, thanks for your help :
import bs4 as bs
import urllib
import urllib.request
import requests
from bs4 import BeautifulSoup
import pandas
from pandas import DataFrame
import csv
with open('test_bs_118000.csv', mode='w') as csv_file:
fieldnames = ['AllInfo', 'Tel']
writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
writer.writeheader()
particulier_allinfo = []
particulier_tel = []
i=1
historyurl= "https://www.118000.fr/search?part=1&who=kevin&page=" + str(i)
historypage= urllib.request.urlopen(historyurl)
soup=bs.BeautifulSoup(historypage,'html.parser')
cat=1
for category in soup.findAll('a',{'class':'clickable atel'}):
print(cat)
print(category.text)
cat=cat+1
q=1
for freru in soup.findAll('div',{'class':'cardbanner'}):
print(q)
print(freru.text)
q=q+1
#creating the data frame and populating its data into the csv file
data = {'particulier_allinfo':[freru.text], 'particulier_tel':[category.text]}
df = DataFrame(data, columns = ['particulier_allinfo', 'particulier_tel'])
print(df)
I am also trying to do a pagination for this code since the url lasts with "page=1,page=2,...,page=n". If you can also help me in this, it would be very nice ! I am looking for it since last week, please help !
回答1:
import requests
from bs4 import BeautifulSoup as bs
import re
import pandas as pd
def main(url):
with requests.Session() as req:
data = []
for page in range(1, 11):
print(f"Extracting Page# {page}")
r = req.get(url.format(page))
soup = bs(r.content, 'html.parser')
names = [name.text for name in soup.select("h2.name.title.inbl")]
phone = [ph.group(1) for ph in re.finditer(
r'mainLine":"(\d+)', r.text)]
for x, y in zip(names, phone):
if y.startswith(("06", "07")):
data.append([x, y])
df = pd.DataFrame(data, columns=["Name", "Phone"])
print(df)
df.to_csv("data.csv", index=False)
print("Data Saved to data.csv")
main("https://www.118000.fr/search?part=1&who=kevin&page={}")
Output: View-Online
Sample:
来源:https://stackoverflow.com/questions/61470986/failing-to-create-the-data-frame-and-populating-its-data-into-the-csv-file-prope