要下载裘宗燕老师的 数据结构与算法(Python 语言) 讲义,
- 使用正则表达式
#!/usr/bin/env python
import re
import urllib.request as request
baseurl = 'http://www.math.pku.edu.cn/teachers/qiuzy/ds_python/courseware/'
with request.urlopen(baseurl) as response:
html = response.read().decode('gb2312')
pdfurls = re.findall(r'[^\"]*.pdf', html, re.I)
for name in pdfurls:
request.urlretrieve(baseurl+name, name)
print('下载{}成功'.format(name))
- 使用 Beautiful Soup
#!/usr/bin/env python
import urllib.request as request
import bs4
baseurl = 'http://www.math.pku.edu.cn/teachers/qiuzy/ds_python/courseware/'
with request.urlopen(baseurl) as response:
html = response.read()
soup = bs4.BeautifulSoup(html, 'lxml')
for link in soup.find_all('a'):
name = link.get('href')
if 'pdf' in name:
request.urlretrieve(baseurl+name, name)
print('download {} success'.format(name))
来源:oschina
链接:https://my.oschina.net/u/347219/blog/638577