问题
I have lots of zipped files on a Linux server and each file includes multiple text files.
what I want is to extract some of those text files, which have the same name across zipped files and save it a folder; I am creating one folder for each zipped file and extract the text file to it. I need to add the parent zipped folder name to the end of file names and save all text files in one directory. For example, if the zipped folder was March132017.zip and I extracted holding.txt, my filename would be holding_march13207.txt.
My problem is that I am not able to change the extracted file's name. I would appreciate if you could advise.
import os
import sys
import zipfile
os.chdir("/feeds/lipper/emaxx")
pwkwd = "/feeds/lipper/emaxx"
for item in os.listdir(pwkwd): # loop through items in dir
if item.endswith(".zip"): # check for ".zip" extension
file_name = os.path.abspath(item) # get full path of files
fh = open(file_name, "rb")
zip_ref = zipfile.ZipFile(fh)
filelist = 'ISSUERS.TXT' , 'SECMAST.TXT' , 'FUND.TXT' , 'HOLDING.TXT'
for name in filelist :
try:
outpath = "/SCRATCH/emaxx" + "/" + os.path.splitext(item)[0]
zip_ref.extract(name, outpath)
except KeyError:
{}
fh.close()
回答1:
Why not just read the file in question and save it yourself instead of extracting? Something like:
import os
import zipfile
source_dir = "/feeds/lipper/emaxx" # folder with zip files
target_dir = "/SCRATCH/emaxx" # folder to save the extracted files
# Are you sure your files names are capitalized in your zip files?
filelist = ['ISSUERS.TXT', 'SECMAST.TXT', 'FUND.TXT', 'HOLDING.TXT']
for item in os.listdir(source_dir): # loop through items in dir
if item.endswith(".zip"): # check for ".zip" extension
file_path = os.path.join(source_dir, item) # get zip file path
with zipfile.ZipFile(file_path) as zf: # open the zip file
for target_file in filelist: # loop through the list of files to extract
if target_file in zf.namelist(): # check if the file exists in the archive
# generate the desired output name:
target_name = os.path.splitext(target_file)[0] + "_" + os.path.splitext(file_path)[0] + ".txt"
target_path = os.path.join(target_dir, target_name) # output path
with open(target_path, "w") as f: # open the output path for writing
f.write(zf.read(target_file)) # save the contents of the file in it
# next file from the list...
# next zip file...
回答2:
import zipfile
zipdata = zipfile.ZipFile('somefile.zip')
zipinfos = zipdata.infolist()
# iterate through each file
for zipinfo in zipinfos:
# This will do the renaming
zipinfo.filename = do_something_to(zipinfo.filename)
zipdata.extract(zipinfo)
Reference: https://bitdrop.st0w.com/2010/07/23/python-extracting-a-file-from-a-zip-file-with-a-different-name/
回答3:
You could simply run a rename after each file is extracted right? os.rename should do the trick.
zip_ref.extract(name, outpath)
parent_zip = os.path.basename(os.path.dirname(outpath)) + ".zip"
new_file_name = os.path.splitext(os.path.basename(name))[0] # just the filename
new_name_path = os.path.dirname(outpath) + os.sep + new_file_name + "_" + parent_zip
os.rename(outpath, new_namepath)
For the filename, if you want it to be incremental, simply start a count and for each file, go up by on.
count = 0
for file in files:
count += 1
# ... Do our file actions
new_file_name = original_file_name + "_" + str(count)
# ...
Or if you don't care about the end name you could always use something like a uuid.
import uuid
random_name = uuid.uuid4()
回答4:
outpath = '/SCRATCH/emaxx'
suffix = os.path.splitext(item)[0]
for name in filelist :
index = zip_ref.namelist().find(name)
if index != -1: # check the file exists in the zipfile
filename, ext = os.path.splitext(name)
zip_ref.filelist[index].filename = f'{filename}_{suffix}.{ext}' # rename the extracting file to the suffix file name
zip_ref.extract(zip_ref.filelist[index], outpath) # use the renamed file descriptor to extract the file
回答5:
I doubt this is possible to rename file during their extraction. What about renaming files once they are extracted ?
Relying on linux bash, you can achieve it in a one line :
os.system("find "+outpath+" -name '*.txt' -exec echo mv {} `echo {} | sed s/.txt/"+zipName+".txt/` \;")
So, first we search all txt files in the specified folder, then exec the renaming command, with the new name computed by sed.
Code not tested, i'm on windows now ^^'
来源:https://stackoverflow.com/questions/44079913/renaming-the-extracted-file-from-zipfile