python正则图片爬取

二次信任 提交于 2019-12-05 08:39:58
# conding:utf8
import requests
import re
import time

if __name__ == "__main__":
    # 所有的数据
    url = 'http://www.win4000.com/zt/qsmy.html'

    response = requests.get(url)
    # with open('./qsmy.html', mode='w', encoding='utf-8') as fp:
    #     fp.write(response.text)
    #     print('网页中的内容保存成功')

    # 我们想要的数据
    # <img src="http://static.win4000.com/home/images/placeholder.jpg" data-original = "http://pic1.win4000.com/wallpaper/5/53bcec5b3235b_270_185.jpg" />
    pattern = r'<img src=".*?" data-original = "(.*?)" />'
    html = response.text
    imahe_urls = re.findall(pattern, html)
    print(imahe_urls)
    for img_url in imahe_urls:
        print(img_url)
        response = requests.get(img_url)
        content = response.content
        file = img_url.rsplit('/', maxsplit=1)[1]
        with open('./tupian/%s' % file, mode='wb') as fp:
            fp.write(content)
            print('图片%s保存成功!' % file)
        time.sleep(1)
易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!