代码:
1 import sys 2 import io 3 import re 4 sys.stdout=io.TextIOWrapper(sys.stdout.buffer,encoding='gb18030') 5 import requests 6 from bs4 import BeautifulSoup 7 8 def html_save(s): 9 with open('Name.csv','a')as f: 10 f.write(s+'\n') 11 # soup = BeautifulSoup(html,'index') 12 def getName_link(): 13 lst=[] 14 soup = BeautifulSoup(open('Girl.html')) 15 for div in soup.find_all('div',{'class':'babynology_textevidence babynology_bg_grey babynology_shadow babynology_radius left overflow_scroll'}): 16 for strong in div.find_all('strong'): 17 print(strong.find_all('a')[0].text.replace(' ','').replace(' ','').replace('\n','')) 18 # print(strong.find_all('a')[0].get('href').replace('\n','')) 19 i=strong.find_all('a')[0].text.replace(' ','').replace(' ','').replace('\n','') 20 # j=strong.find_all('a')[0].get('href').replace('\n','') 21 # lst.append(j) 22 html_save(i) 23 # html_save(j) 24 # print(lst) 25 # return lst 26 getName_link()
运行结果: