问题
I would like to zip a couple of files that may amount to about 99 GB using python. Please what is the most efficient way to do this using the zipfile library. This is a sample code I have
with gcs.open(zip_file_name, 'w', content_type=b'application/zip') as f:
with zipfile.ZipFile(f, 'w') as z:
for file in files:
is_owner = (is_page_allowed_to_visitor(page, visitor) or (file.owner_id == visitor.id) )
if is_owner:
file.show = True
elif file.available_from:
if file.available_from > datetime.now():
file.show = False
elif file.available_to:
if file.available_to < datetime.now():
file.show = False
else:
file.show = True
if file.show:
file_name = "/%s/%s" % (gcs_store.get_bucket_name(), file.gcs_name)
gcs_reader = gcs.open(file_name, 'r')
z.writestr('%s-%s' %(file.created_on, file.name), gcs_reader.read() )
gcs_reader.close()
f.close() #closing zip file
Some points to note:
1) I am using the google app engine to host the files so I cannot use the zipfile.write() method. I can only get the file contents in bytes.
Thanks in advance
回答1:
I have added a new method to the zipfile
library. This enhanced zipfile library is open source and can be found on github (EnhancedZipFile). I added a new method with the inspiration from the zipfile.write()
method and the zipfile.writestr()
method
def writebuffered(self, zinfo_or_arcname, file_pointer, file_size, compress_type=None):
if not isinstance(zinfo_or_arcname, ZipInfo):
zinfo = ZipInfo(filename=zinfo_or_arcname,
date_time=time.localtime(time.time())[:6])
zinfo.compress_type = self.compression
if zinfo.filename[-1] == '/':
zinfo.external_attr = 0o40775 << 16 # drwxrwxr-x
zinfo.external_attr |= 0x10 # MS-DOS directory flag
else:
zinfo.external_attr = 0o600 << 16 # ?rw-------
else:
zinfo = zinfo_or_arcname
zinfo.file_size = file_size # Uncompressed size
zinfo.header_offset = self.fp.tell() # Start of header bytes
self._writecheck(zinfo)
self._didModify = True
fp = file_pointer
# Must overwrite CRC and sizes with correct data later
zinfo.CRC = CRC = 0
zinfo.compress_size = compress_size = 0
# Compressed size can be larger than uncompressed size
zip64 = self._allowZip64 and \
zinfo.file_size * 1.05 > ZIP64_LIMIT
self.fp.write(zinfo.FileHeader(zip64))
if zinfo.compress_type == ZIP_DEFLATED:
cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
zlib.DEFLATED, -15)
else:
cmpr = None
file_size = 0
while 1:
buf = fp.read(1024 * 8)
if not buf:
break
file_size = file_size + len(buf)
CRC = crc32(buf, CRC) & 0xffffffff
if cmpr:
buf = cmpr.compress(buf)
compress_size = compress_size + len(buf)
self.fp.write(buf)
if cmpr:
buf = cmpr.flush()
compress_size = compress_size + len(buf)
self.fp.write(buf)
zinfo.compress_size = compress_size
else:
zinfo.compress_size = file_size
zinfo.CRC = CRC
zinfo.file_size = file_size
if not zip64 and self._allowZip64:
if file_size > ZIP64_LIMIT:
raise RuntimeError('File size has increased during compressing')
if compress_size > ZIP64_LIMIT:
raise RuntimeError('Compressed size larger than uncompressed size')
# Seek backwards and write file header (which will now include
# correct CRC and file sizes)
position = self.fp.tell() # Preserve current position in file
self.fp.flush()
self.filelist.append(zinfo)
self.NameToInfo[zinfo.filename] = zinfo
Points to note
- I am a newbie in python so the code I wrote above may not be very optimized.
- Please contribute to the project on github here https://github.com/najela/EnhancedZipFile
来源:https://stackoverflow.com/questions/26849328/how-to-zip-a-very-large-file-in-python