爬取中国大学排名

早过忘川 提交于 2020-01-29 19:37:10
import requests
from bs4 import BeautifulSoup
import bs4
url = "http://www.zuihaodaxue.cn/zuihaodaxuepaiming2019.html"
ulist = []


# 获取HTML页面
def getHTML(url):
    try:
        r = requests.get(url, timeout=30)
        r.raise_for_status()
        r.encoding = r.apparent_encoding
        return r.text
    except:
        print("异常")
        return ""

# 提取排名信息
def getUniv(ulist, html):
    soup = BeautifulSoup(html, "html.parser")

    for tr in soup.tbody.children:
        if isinstance(tr, bs4.element.Tag):  # tr是bs4语句
            tds = tr('td')
            ulist.append(
                [tds[0].string, tds[1].string, tds[2].string, tds[3].string])
    return ulist

# 输出排名信息
def readUniv(ulist):
    s = eval(input("请输入要查询前多少名大学:"))
    print("前{}名的排名如下:".format(s))
    print("{:^3}\t{:^20}\t{:^10}\t{:^5}".format("排名", "学校名称", "城市", "分数"))
    for i in range(s):
        u = ulist[i]
        print("{:^3}\t{:^20}\t{:^10}\t{:^5}".format(u[0], u[1], u[2], u[3]))


html = getHTML(url)
ulist = getUniv(ulist, html)
readUniv(ulist)

 

标签
易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!