How do I download a file over HTTP using Python?

前端 未结 25 2980
感动是毒
感动是毒 2020-11-21 07:17

I have a small utility that I use to download an MP3 file from a website on a schedule and then builds/updates a podcast XML file which I\'ve added to iTunes.

The te

相关标签:
25条回答
  • 2020-11-21 07:52

    An improved version of the PabloG code for Python 2/3:

    #!/usr/bin/env python
    # -*- coding: utf-8 -*-
    from __future__ import ( division, absolute_import, print_function, unicode_literals )
    
    import sys, os, tempfile, logging
    
    if sys.version_info >= (3,):
        import urllib.request as urllib2
        import urllib.parse as urlparse
    else:
        import urllib2
        import urlparse
    
    def download_file(url, dest=None):
        """ 
        Download and save a file specified by url to dest directory,
        """
        u = urllib2.urlopen(url)
    
        scheme, netloc, path, query, fragment = urlparse.urlsplit(url)
        filename = os.path.basename(path)
        if not filename:
            filename = 'downloaded.file'
        if dest:
            filename = os.path.join(dest, filename)
    
        with open(filename, 'wb') as f:
            meta = u.info()
            meta_func = meta.getheaders if hasattr(meta, 'getheaders') else meta.get_all
            meta_length = meta_func("Content-Length")
            file_size = None
            if meta_length:
                file_size = int(meta_length[0])
            print("Downloading: {0} Bytes: {1}".format(url, file_size))
    
            file_size_dl = 0
            block_sz = 8192
            while True:
                buffer = u.read(block_sz)
                if not buffer:
                    break
    
                file_size_dl += len(buffer)
                f.write(buffer)
    
                status = "{0:16}".format(file_size_dl)
                if file_size:
                    status += "   [{0:6.2f}%]".format(file_size_dl * 100 / file_size)
                status += chr(13)
                print(status, end="")
            print()
    
        return filename
    
    if __name__ == "__main__":  # Only run if this file is called directly
        print("Testing with 10MB download")
        url = "http://download.thinkbroadband.com/10MB.zip"
        filename = download_file(url)
        print(filename)
    
    0 讨论(0)
  • 2020-11-21 07:53

    In python3 you can use urllib3 and shutil libraires. Download them by using pip or pip3 (Depending whether python3 is default or not)

    pip3 install urllib3 shutil
    

    Then run this code

    import urllib.request
    import shutil
    
    url = "http://www.somewebsite.com/something.pdf"
    output_file = "save_this_name.pdf"
    with urllib.request.urlopen(url) as response, open(output_file, 'wb') as out_file:
        shutil.copyfileobj(response, out_file)
    

    Note that you download urllib3 but use urllib in code

    0 讨论(0)
  • 2020-11-21 07:53

    If you have wget installed, you can use parallel_sync.

    pip install parallel_sync

    from parallel_sync import wget
    urls = ['http://something.png', 'http://somthing.tar.gz', 'http://somthing.zip']
    wget.download('/tmp', urls)
    # or a single file:
    wget.download('/tmp', urls[0], filenames='x.zip', extract=True)
    

    Doc: https://pythonhosted.org/parallel_sync/pages/examples.html

    This is pretty powerful. It can download files in parallel, retry upon failure , and it can even download files on a remote machine.

    0 讨论(0)
  • 2020-11-21 07:54
    import urllib2
    mp3file = urllib2.urlopen("http://www.example.com/songs/mp3.mp3")
    with open('test.mp3','wb') as output:
      output.write(mp3file.read())
    

    The wb in open('test.mp3','wb') opens a file (and erases any existing file) in binary mode so you can save data with it instead of just text.

    0 讨论(0)
  • 2020-11-21 07:56

    Python 3

    • urllib.request.urlopen

      import urllib.request
      response = urllib.request.urlopen('http://www.example.com/')
      html = response.read()
      
    • urllib.request.urlretrieve

      import urllib.request
      urllib.request.urlretrieve('http://www.example.com/songs/mp3.mp3', 'mp3.mp3')
      

      Note: According to the documentation, urllib.request.urlretrieve is a "legacy interface" and "might become deprecated in the future" (thanks gerrit)

    Python 2

    • urllib2.urlopen (thanks Corey)

      import urllib2
      response = urllib2.urlopen('http://www.example.com/')
      html = response.read()
      
    • urllib.urlretrieve (thanks PabloG)

      import urllib
      urllib.urlretrieve('http://www.example.com/songs/mp3.mp3', 'mp3.mp3')
      
    0 讨论(0)
  • 2020-11-21 07:57

    I wrote the following, which works in vanilla Python 2 or Python 3.


    import sys
    try:
        import urllib.request
        python3 = True
    except ImportError:
        import urllib2
        python3 = False
    
    
    def progress_callback_simple(downloaded,total):
        sys.stdout.write(
            "\r" +
            (len(str(total))-len(str(downloaded)))*" " + str(downloaded) + "/%d"%total +
            " [%3.2f%%]"%(100.0*float(downloaded)/float(total))
        )
        sys.stdout.flush()
    
    def download(srcurl, dstfilepath, progress_callback=None, block_size=8192):
        def _download_helper(response, out_file, file_size):
            if progress_callback!=None: progress_callback(0,file_size)
            if block_size == None:
                buffer = response.read()
                out_file.write(buffer)
    
                if progress_callback!=None: progress_callback(file_size,file_size)
            else:
                file_size_dl = 0
                while True:
                    buffer = response.read(block_size)
                    if not buffer: break
    
                    file_size_dl += len(buffer)
                    out_file.write(buffer)
    
                    if progress_callback!=None: progress_callback(file_size_dl,file_size)
        with open(dstfilepath,"wb") as out_file:
            if python3:
                with urllib.request.urlopen(srcurl) as response:
                    file_size = int(response.getheader("Content-Length"))
                    _download_helper(response,out_file,file_size)
            else:
                response = urllib2.urlopen(srcurl)
                meta = response.info()
                file_size = int(meta.getheaders("Content-Length")[0])
                _download_helper(response,out_file,file_size)
    
    import traceback
    try:
        download(
            "https://geometrian.com/data/programming/projects/glLib/glLib%20Reloaded%200.5.9/0.5.9.zip",
            "output.zip",
            progress_callback_simple
        )
    except:
        traceback.print_exc()
        input()
    

    Notes:

    • Supports a "progress bar" callback.
    • Download is a 4 MB test .zip from my website.
    0 讨论(0)
提交回复
热议问题