How can I create a zip archive of a directory structure in Python?
For anyone else delving into this question and trying to archive the very same directory their program is in and is getting both very deep tree structures and ending up with recursion due to the zip file zipping itself, try this.
It's a combination of Mark's answer and some extra checks to ensure that there's no recursive zipping of the zipfile itself, and no unnecessarily deep folder structures.
import os
import zipfile
def zipdir(path, ziph, ignored_directories, ignored_files):
# ziph is zipfile handle
for root, dirs, files in os.walk(path):
for file in files:
if not any(ignored_dir in root for ignored_dir in ignored_directories):
if not any(ignored_fname in file for ignored_fname in ignored_files):
ziph.write(os.path.join(root, file))
# current working directory
this_dir = os.path.dirname(os.path.abspath(__file__))
# the directory within the working directory the zip will be created in (build/archives).
zip_dest_dir = os.path.join('build', 'archives')
# verify zip_dest_dir exists: if not, create it
if not os.path.isdir(zip_dest_dir):
os.makedirs(zip_dest_dir, exist_ok=True)
# leave zip_dest_dir blank (or set dist_dir = this_dir) if you want the zip file in the working directory (same directory as the script)
dest_dir = os.path.join(this_dir, zip_dest_dir)
# name the zip file: remember the file extension
zip_filename = 'zipped_directory.zip'
# zip file's path
zip_path = os.path.join(dest_dir, zip_filename)
# create the zipfile handle: you can change ZIP_STORED to any other compression algorithm of your choice, like ZIP_DEFLATED, if you need actual compression
zipf = zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_STORED)
# ignored files and directories: I personally wanted to ignore the "build" directory, alongside with "node_modules", so those would be listed here.
ignored_dirs = []
# ignore any specific files: in my case, I was ignoring the script itself, so I'd include 'deploy.py' here
ignored_files = [zip_filename]
# zip directory contents
zipdir('.', zipf, ignored_dirs, ignored_files)
zipf.close()
The resulting zip file should only include directories starting from the working directory: so no Users/user/Desktop/code/.../working_directory/.../etc. kind of file structure.
Well, after reading the suggestions I came up with a very similar way that works with 2.7.x without creating "funny" directory names (absolute-like names), and will only create the specified folder inside the zip.
Or just in case you needed your zip to contain a folder inside with the contents of the selected directory.
def zipDir( path, ziph ) :
"""
Inserts directory (path) into zipfile instance (ziph)
"""
for root, dirs, files in os.walk( path ) :
for file in files :
ziph.write( os.path.join( root, file ) , os.path.basename( os.path.normpath( path ) ) + "\\" + file )
def makeZip( pathToFolder ) :
"""
Creates a zip file with the specified folder
"""
zipf = zipfile.ZipFile( pathToFolder + 'file.zip', 'w', zipfile.ZIP_DEFLATED )
zipDir( pathToFolder, zipf )
zipf.close()
print( "Zip file saved to: " + pathToFolder)
makeZip( "c:\\path\\to\\folder\\to\\insert\\into\\zipfile" )
For adding compression to the resulting zip file, check out this link.
You need to change:
zip = zipfile.ZipFile('Python.zip', 'w')
to
zip = zipfile.ZipFile('Python.zip', 'w', zipfile.ZIP_DEFLATED)
Here's a modern approach, using pathlib, and a context manager. Puts the files directly in the zip, rather than in a subfolder.
def zip_dir(filename: str, dir_to_zip: pathlib.Path):
with zipfile.ZipFile(filename, 'w', zipfile.ZIP_DEFLATED) as zipf:
# Use glob instead of iterdir(), to cover all subdirectories.
for directory in dir_to_zip.glob('**'):
for file in directory.iterdir():
if not file.is_file():
continue
# Strip the first component, so we don't create an uneeded subdirectory
# containing everything.
zip_path = pathlib.Path(*file.parts[1:])
# Use a string, since zipfile doesn't support pathlib directly.
zipf.write(str(file), str(zip_path))
To add the contents of mydirectory
to a new zip file, including all files and subdirectories:
import os
import zipfile
zf = zipfile.ZipFile("myzipfile.zip", "w")
for dirname, subdirs, files in os.walk("mydirectory"):
zf.write(dirname)
for filename in files:
zf.write(os.path.join(dirname, filename))
zf.close()
I have another code example that may help, using python3, pathlib and zipfile. It should work in any OS.
from pathlib import Path
import zipfile
from datetime import datetime
DATE_FORMAT = '%y%m%d'
def date_str():
"""returns the today string year, month, day"""
return '{}'.format(datetime.now().strftime(DATE_FORMAT))
def zip_name(path):
"""returns the zip filename as string"""
cur_dir = Path(path).resolve()
parent_dir = cur_dir.parents[0]
zip_filename = '{}/{}_{}.zip'.format(parent_dir, cur_dir.name, date_str())
p_zip = Path(zip_filename)
n = 1
while p_zip.exists():
zip_filename = ('{}/{}_{}_{}.zip'.format(parent_dir, cur_dir.name,
date_str(), n))
p_zip = Path(zip_filename)
n += 1
return zip_filename
def all_files(path):
"""iterator returns all files and folders from path as absolute path string
"""
for child in Path(path).iterdir():
yield str(child)
if child.is_dir():
for grand_child in all_files(str(child)):
yield str(Path(grand_child))
def zip_dir(path):
"""generate a zip"""
zip_filename = zip_name(path)
zip_file = zipfile.ZipFile(zip_filename, 'w')
print('create:', zip_filename)
for file in all_files(path):
print('adding... ', file)
zip_file.write(file)
zip_file.close()
if __name__ == '__main__':
zip_dir('.')
print('end!')