代码小测试

我的未来我决定 提交于 2020-02-08 01:43:39
from  bs4  import BeautifulSoup
from lxml import html,etree
file ='hm.html'
htmlfile = open(file, 'r', encoding='utf-8')
htmlhandle = htmlfile.read()
soup = BeautifulSoup(htmlhandle, features='lxml')
#a=soup.text
a = soup.find_all(name='div',attrs={"class":"p"})[0].text
#a = soup.select('')

#print(a)#以上为内容爬取


#网页的url进行爬取
from  bs4  import BeautifulSoup
from lxml import html,etree
file ='hm.html'
htmlfile = open(file, 'r', encoding='utf-8')
htmlhandle = htmlfile.read()
soup = BeautifulSoup(htmlhandle, features='lxml')
#a = soup.find_all(name='li',attrs={"class":"last"})
#a = soup.a.attrs['href']
print(soup.select('a')[32]['href'])

#获取标题

from  bs4  import BeautifulSoup
from lxml import html,etree
file ='hm.html'
htmlfile = open(file, 'r', encoding='utf-8')
htmlhandle = htmlfile.read()
soup = BeautifulSoup(htmlhandle, features='lxml')
a = soup.find_all(name='div',attrs={"class":"chapter_update_time"})[0].text
print(a)

#全局变量的使用
def ja (a,b):
	global c
	c = a+b
	return c

def  main ():
	a=1
	b=2
	n=ja(a,b)
	
	print(c)

if __name__ == '__main__':
	main()'''

#bs4清洗数据的小练习
from  bs4  import BeautifulSoup
from lxml import html,etree
file ='hm.html'
htmlfile = open(file, 'r', encoding='utf-8')
htmlhandle = htmlfile.read()
soup = BeautifulSoup(htmlhandle, features='lxml')
a = soup.find_all(name='div',attrs={"class":"chapter_update_time"})[0].text
print(a)

from bs4 import BeautifulSoup
file = 'hm.html'
htmlfile = open(file,'r',encoding='utf-8').read()
soup = BeautifulSoup(htmlfile,'lxml')
a = soup.find_all(name='div',attrs={"class":"chapter_update_time"})[0].text
print(a)

标签
易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!