I want to tell whether two tarball files contain identical files, in terms of file name and file content, not including meta-data like date, user, group.
However, There
I have a similar question and i resolve it by python, here is the code. ps:although this code is used to compare two zipball's content,but it's similar with tarball, hope i can help you
import zipfile
import os,md5
import hashlib
import shutil
def decompressZip(zipName, dirName):
try:
zipFile = zipfile.ZipFile(zipName, "r")
fileNames = zipFile.namelist()
for file in fileNames:
zipFile.extract(file, dirName)
zipFile.close()
return fileNames
except Exception,e:
raise Exception,e
def md5sum(filename):
f = open(filename,"rb")
md5obj = hashlib.md5()
md5obj.update(f.read())
hash = md5obj.hexdigest()
f.close()
return str(hash).upper()
if __name__ == "__main__":
oldFileList = decompressZip("./old.zip", "./oldDir")
newFileList = decompressZip("./new.zip", "./newDir")
oldDict = dict()
newDict = dict()
for oldFile in oldFileList:
tmpOldFile = "./oldDir/" + oldFile
if not os.path.isdir(tmpOldFile):
oldFileMD5 = md5sum(tmpOldFile)
oldDict[oldFile] = oldFileMD5
for newFile in newFileList:
tmpNewFile = "./newDir/" + newFile
if not os.path.isdir(tmpNewFile):
newFileMD5 = md5sum(tmpNewFile)
newDict[newFile] = newFileMD5
additionList = list()
modifyList = list()
for key in newDict:
if not oldDict.has_key(key):
additionList.append(key)
else:
newMD5 = newDict[key]
oldMD5 = oldDict[key]
if not newMD5 == oldMD5:
modifyList.append(key)
print "new file lis:%s" % additionList
print "modified file list:%s" % modifyList
shutil.rmtree("./oldDir")
shutil.rmtree("./newDir")