How to compare two tarball's content

后端 未结 11 841
北海茫月
北海茫月 2021-01-31 15:18

I want to tell whether two tarball files contain identical files, in terms of file name and file content, not including meta-data like date, user, group.

However, There

11条回答
  •  不知归路
    2021-01-31 15:57

    I have a similar question and i resolve it by python, here is the code. ps:although this code is used to compare two zipball's content,but it's similar with tarball, hope i can help you

    import zipfile
    import os,md5
    import hashlib
    import shutil
    
    def decompressZip(zipName, dirName):
        try:
            zipFile = zipfile.ZipFile(zipName, "r")
            fileNames = zipFile.namelist()
            for file in fileNames:
                zipFile.extract(file, dirName)
            zipFile.close()
            return fileNames
        except Exception,e:
            raise Exception,e
    
    def md5sum(filename):
        f = open(filename,"rb")
        md5obj = hashlib.md5()
        md5obj.update(f.read())
        hash = md5obj.hexdigest()
        f.close()
        return str(hash).upper()
    
    if __name__ == "__main__":
        oldFileList = decompressZip("./old.zip", "./oldDir")
        newFileList = decompressZip("./new.zip", "./newDir")
    
        oldDict = dict()
        newDict = dict()
    
        for oldFile in oldFileList:
            tmpOldFile = "./oldDir/" + oldFile
            if not os.path.isdir(tmpOldFile):
                oldFileMD5 = md5sum(tmpOldFile)
                oldDict[oldFile] = oldFileMD5
    
        for newFile in newFileList:
            tmpNewFile = "./newDir/" + newFile
            if not os.path.isdir(tmpNewFile):
                newFileMD5 = md5sum(tmpNewFile)
                newDict[newFile] = newFileMD5
    
        additionList = list()
        modifyList = list()
    
        for key in newDict:
            if not oldDict.has_key(key):
                additionList.append(key)
            else:
                newMD5 = newDict[key]
                oldMD5 = oldDict[key]
                if not newMD5 == oldMD5:
                modifyList.append(key)
    
        print "new file lis:%s" % additionList
        print "modified file list:%s" % modifyList
    
        shutil.rmtree("./oldDir")
        shutil.rmtree("./newDir")
    

提交回复
热议问题