学了这么久,得到了一些经验与分享:
其中:函数open1()访问制定URL的网页;
函数getp()是访问下图小姐姐写真标签,深蓝色即使爬取标签数
函数find()爬取写真照的URL
函数save()将指定URL的写真爬取到制定的文件夹里
主函数down()
import urllib.request
import os
def open1(url):
rep=urllib.request.Request(url)
rep.add_header('User-Agent',' Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:71.0) Gecko/20100101 Firefox/71.0')
r=urllib.request.urlopen(rep)
h=r.read()
return h
def getp(url):
h=open1(url).decode('utf-8')
a=h.find("current-comment-page")+23
b=h.find(']',a)
return h[a:b]
def find(url):
h=open1(url).decode('utf-8')
tu=[]
a=h.find('img src=')
while a!=-1:
b=h.find('.jpg',a,a+255)
if b!=-1:
tu.append(h[a+9:b+4])
else:
b=a+9
a=h.find('img src=',b)
return tu
def save(f,image):
for i in image:
a=i.split('/')[-1]
with open(a,'wb') as f1:
img=open1('http:'+i)
f1.write(img)
def down(f='爬虫',p=15):
os.mkdir(f)
os.chdir(f)
url='http://jandan.net/ooxx'
pn=int(getp(url))
for i in range(p):
purl=url+'/MjAyMDAyMjktMjA'+chr(97+i)+'#comments'
image=find(purl)
save(f,image)
if __name__=='__main__':
down()
在这里插入代码片
来源:CSDN
作者:鸟随二月
链接:https://blog.csdn.net/feiqipengcheng/article/details/104578975