How to download specific Google Drive folder using Python?

后端 未结 4 553
面向向阳花
面向向阳花 2020-12-06 15:18

I\'m trying to download specific folders from Google Drive.

I tried this example http://www.mwclearning.com/?p=1608 but its download all the files from G-Drive.

相关标签:
4条回答
  • 2020-12-06 15:53

    Please do download the 'client_id.json' file as specified in the tutorial link for downloading follow steps 5-7

    In the last line of the code change the "folder_id" to the id of the folder you want to download from drive by right clicking on the folder and enabling share link. The id will be the part of URL after "id=" and also changing the "savepath" to the path where you want to save the downloaded folder to be on your system.

    from __future__ import print_function
    
    from googleapiclient import discovery
    from httplib2 import Http
    from oauth2client import file, client, tools
    import os, io
    from apiclient.http import MediaFileUpload, MediaIoBaseDownload
    
    SCOPES = 'https://www.googleapis.com/auth/drive'
    store = file.Storage('storage.json')
    creds = store.get()
    if not creds or creds.invalid:
        flow = client.flow_from_clientsecrets('client_id.json', SCOPES)
        creds = tools.run_flow(flow, store)
    DRIVE = discovery.build('drive', 'v3', http=creds.authorize(Http()))
    
    def retaining_folder_structure(query,filepath):
        results = DRIVE.files().list(fields="nextPageToken, files(id, name, kind, mimeType)",q=query).execute()
        items = results.get('files', [])
        for item in items:
            #print(item['name'])
            if item['mimeType']=='application/vnd.google-apps.folder':
                fold=item['name']
                path=filepath+'/'+fold
                if os.path.isdir(path):
                    retaining_folder_structure("'%s' in parents"%(item['id']),path)
                else:
                    os.mkdir(path)
                    retaining_folder_structure("'%s' in parents"%(item['id']),path)
            else:
                request = DRIVE.files().get_media(fileId=item['id'])
                fh = io.BytesIO()
                downloader = MediaIoBaseDownload(fh, request)
                done = False
                while done is False:
                    status, done = downloader.next_chunk()
                    print("Download %d%%." % int(status.progress() * 100))
                path=filepath+'/'+item['name']
                #print(path)
                with io.open(path,'wb') as f:
                    fh.seek(0)
                    f.write(fh.read())
    
    retaining_floder_structure("'folder_id' in parents",'savepath')
    
    0 讨论(0)
  • 2020-12-06 15:54

    Try to check the Google Drive API documentation, you can see here the sample code use to perform a file download using Python.

    file_id = '0BwwA4oUTeiV1UVNwOHItT0xfa2M'
    request = drive_service.files().get_media(fileId=file_id)
    fh = io.BytesIO()
    downloader = MediaIoBaseDownload(fh, request)
    done = False
    while done is False:
        status, done = downloader.next_chunk()
        print "Download %d%%." % int(status.progress() * 100)
    

    For the folders part, you can check here on how to get it.

    For more information, you can check this tutorial and YT video.

    0 讨论(0)
  • 2020-12-06 15:54

    Use Drive credentials.json Downloaded from your Drive API

    from __future__ import print_function
    import pickle
    import os
    from googleapiclient.discovery import build
    from google_auth_oauthlib.flow import InstalledAppFlow
    from google.auth.transport.requests import Request
    from oauth2client import client
    from oauth2client import tools
    from oauth2client.file import Storage
    from apiclient.http import MediaFileUpload, MediaIoBaseDownload
    import io
    from apiclient import errors
    from apiclient import http
    import logging
    
    from apiclient import discovery
    
    # If modifying these scopes, delete the file token.pickle.
    SCOPES = ['https://www.googleapis.com/auth/drive']
    
    
    # To list folders
    def listfolders(service, filid, des):
        results = service.files().list(
            pageSize=1000, q="\'" + filid + "\'" + " in parents",
            fields="nextPageToken, files(id, name, mimeType)").execute()
        # logging.debug(folder)
        folder = results.get('files', [])
        for item in folder:
            if str(item['mimeType']) == str('application/vnd.google-apps.folder'):
                if not os.path.isdir(des+"/"+item['name']):
                    os.mkdir(path=des+"/"+item['name'])
                print(item['name'])
                listfolders(service, item['id'], des+"/"+item['name'])  # LOOP un-till the files are found
            else:
                downloadfiles(service, item['id'], item['name'], des)
                print(item['name'])
        return folder
    
    
    # To Download Files
    def downloadfiles(service, dowid, name,dfilespath):
        request = service.files().get_media(fileId=dowid)
        fh = io.BytesIO()
        downloader = MediaIoBaseDownload(fh, request)
        done = False
        while done is False:
            status, done = downloader.next_chunk()
            print("Download %d%%." % int(status.progress() * 100))
        with io.open(dfilespath + "/" + name, 'wb') as f:
            fh.seek(0)
            f.write(fh.read())
    
    
    def main():
        """Shows basic usage of the Drive v3 API.
        Prints the names and ids of the first 10 files the user has access to.
        """
        creds = None
        # The file token.pickle stores the user's access and refresh tokens, and is
        # created automatically when the authorization flow completes for the first
        # time.
        if os.path.exists('token.pickle'):
            with open('token.pickle', 'rb') as token:
                creds = pickle.load(token)
        # If there are no (valid) credentials available, let the user log in.
        if not creds or not creds.valid:
            if creds and creds.expired and creds.refresh_token:
                creds.refresh(Request())
            else:
                flow = InstalledAppFlow.from_client_secrets_file(
                    'credentials.json', SCOPES)  # credentials.json download from drive API
                creds = flow.run_local_server()
            # Save the credentials for the next run
            with open('token.pickle', 'wb') as token:
                pickle.dump(creds, token)
    
        service = build('drive', 'v3', credentials=creds)
        # Call the Drive v3 API
    
        Folder_id = "'PAST YOUR SHARED FOLDER ID'"  # Enter The Downloadable folder ID From Shared Link
    
        results = service.files().list(
            pageSize=1000, q=Folder_id+" in parents", fields="nextPageToken, files(id, name, mimeType)").execute()
        items = results.get('files', [])
        if not items:
            print('No files found.')
        else:
            print('Files:')
            for item in items:
                if item['mimeType'] == 'application/vnd.google-apps.folder':
                    if not os.path.isdir("Folder"):
                        os.mkdir("Folder")
                    bfolderpath = os.getcwd()+"/Folder/"
                    if not os.path.isdir(bfolderpath+item['name']):
                        os.mkdir(bfolderpath+item['name'])
    
                    folderpath = bfolderpath+item['name']
                    listfolders(service, item['id'], folderpath)
                else:
                    if not os.path.isdir("Folder"):
                        os.mkdir("Folder")
                    bfolderpath = os.getcwd()+"/Folder/"
                    if not os.path.isdir(bfolderpath + item['name']):
                        os.mkdir(bfolderpath + item['name'])
    
                    filepath = bfolderpath + item['name']
                    downloadfiles(service, item['id'], item['name'], filepath)
    
    
    if __name__ == '__main__':
        main()
    
    0 讨论(0)
  • 2020-12-06 16:15

    Here's just the code that deals specifically with downloading a folder recursively.

    I've tried to keep it to-the-point, omitting code that's described in tutorials already. I expect you to already have the ID of the folder that you want to download.

    The part elif not itemType.startswith('application/'): has the purpose of skipping any Drive-format documents. However, the check is overly-simplistic, so you might want to improve it or remove it.

    from __future__ import print_function
    import pickle
    import os.path
    import io
    from googleapiclient.discovery import build
    from googleapiclient.http import MediaIoBaseDownload
    from google_auth_oauthlib.flow import InstalledAppFlow
    from google.auth.transport.requests import Request
    
    # If modifying these scopes, delete the file token.pickle.
    SCOPES = ['https://www.googleapis.com/auth/drive.readonly']
    
    def main():
        """Based on the quickStart.py example at
        https://developers.google.com/drive/api/v3/quickstart/python
        """
        creds = getCredentials()
        service = build('drive', 'v3', credentials=creds)
        
        folderId = ""
        destinationFolder = ""
        downloadFolder(service, folderId, destinationFolder)
    
    
    def downloadFolder(service, fileId, destinationFolder):
        if not os.path.isdir(destinationFolder):
            os.mkdir(path=destinationFolder)
    
        results = service.files().list(
            pageSize=300,
            q="parents in '{0}'".format(fileId),
            fields="files(id, name, mimeType)"
            ).execute()
    
        items = results.get('files', [])
    
        for item in items:
            itemName = item['name']
            itemId = item['id']
            itemType = item['mimeType']
            filePath = destinationFolder + "/" + itemName
    
            if itemType == 'application/vnd.google-apps.folder':
                print("Stepping into folder: {0}".format(filePath))
                downloadFolder(service, itemId, filePath) # Recursive call
            elif not itemType.startswith('application/'):
                downloadFile(service, itemId, filePath)
            else:
                print("Unsupported file: {0}".format(itemName))
    
    
    def downloadFile(service, fileId, filePath):
        # Note: The parent folders in filePath must exist
        print("-> Downloading file with id: {0} name: {1}".format(fileId, filePath))
        request = service.files().get_media(fileId=fileId)
        fh = io.FileIO(filePath, mode='wb')
        
        try:
            downloader = MediaIoBaseDownload(fh, request, chunksize=1024*1024)
    
            done = False
            while done is False:
                status, done = downloader.next_chunk(num_retries = 2)
                if status:
                    print("Download %d%%." % int(status.progress() * 100))
            print("Download Complete!")
        finally:
            fh.close()
    
    0 讨论(0)
提交回复
热议问题