爬取热门视频的bv号
爬取首页html
const axios = require('axios')
const url = 'https://www.bilibili.com/'
const headers = {
"accept": "*/*",
"accept-language": "zh-CN,zh;q=0.9,en;q=0.8",
"content-type": "application/x-www-form-urlencoded; charset=UTF-8",
"sec-fetch-dest": "empty",
"sec-fetch-mode": "cors",
"sec-fetch-site": "same-site",
"referrer": "https://www.bilibili.com",
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.106 Safari/537.36',
}
const resp = axios.get(url, {headers})
resp.then(
({data}) => {
console.log(data)
}
)
根据dom, 获取html中的链接内容
const $ = require('cheerio')
const fs = require('fs')
const path = './t.html'
const html = fs.readFileSync(path, 'utf8')
async function getList(html) {
let dom = $.load(html)
let links = dom('div.info-box>a')
return links.map(
(i, el) => {
let href = el.attribs.href
return href.split('/').pop()
}
).toArray()
}
async function main() {
console.log(await getList(html))
}
main()
获取结果
[
'BV1DK4y147RU',
'BV1vK4y147H1',
'BV1cC4y1Y7JD',
'BV1dD4y1D7HJ',
'BV1Ft4y197pr',
'BV1yC4y187Pp',
'BV1TD4y1Q7xQ',
'BV11a4y1h7wd',
'BV14g4y1q7du',
'BV1Yz411e7Rb'
]
来源:oschina
链接:https://my.oschina.net/ahaoboy/blog/4324692