Compress directory to tar.gz with Commons Compress

后端 未结 6 1365
终归单人心
终归单人心 2020-12-30 06:13

I\'m running into a problem using the commons compress library to create a tar.gz of a directory. I have a directory structure that is as follows.

parent/
          


        
相关标签:
6条回答
  • 2020-12-30 06:32

    I haven't figured out what exactly was going wrong but a scouring of google caches I found a working example. Sorry for the tumbleweed!

    public void CreateTarGZ()
        throws FileNotFoundException, IOException
    {
        try {
            System.out.println(new File(".").getAbsolutePath());
            dirPath = "parent/childDirToCompress/";
            tarGzPath = "archive.tar.gz";
            fOut = new FileOutputStream(new File(tarGzPath));
            bOut = new BufferedOutputStream(fOut);
            gzOut = new GzipCompressorOutputStream(bOut);
            tOut = new TarArchiveOutputStream(gzOut);
            addFileToTarGz(tOut, dirPath, "");
        } finally {
            tOut.finish();
            tOut.close();
            gzOut.close();
            bOut.close();
            fOut.close();
        }
    }
    
    private void addFileToTarGz(TarArchiveOutputStream tOut, String path, String base)
        throws IOException
    {
        File f = new File(path);
        System.out.println(f.exists());
        String entryName = base + f.getName();
        TarArchiveEntry tarEntry = new TarArchiveEntry(f, entryName);
        tOut.putArchiveEntry(tarEntry);
    
        if (f.isFile()) {
            IOUtils.copy(new FileInputStream(f), tOut);
            tOut.closeArchiveEntry();
        } else {
            tOut.closeArchiveEntry();
            File[] children = f.listFiles();
            if (children != null) {
                for (File child : children) {
                    System.out.println(child.getName());
                    addFileToTarGz(tOut, child.getAbsolutePath(), entryName + "/");
                }
            }
        }
    }
    
    0 讨论(0)
  • 2020-12-30 06:35

    I had to make some adjustments to @merrick solution to get it to work related to the path. Perhaps with the latest maven dependencies. The currently accepted solution didn't work for me.

    import java.io.BufferedOutputStream;
    import java.io.File;
    import java.io.FileInputStream;
    import java.io.FileNotFoundException;
    import java.io.FileOutputStream;
    import java.io.IOException;
    import java.util.ArrayList;
    import java.util.List;
    import org.apache.commons.compress.archivers.tar.TarArchiveEntry;
    import org.apache.commons.compress.archivers.tar.TarArchiveOutputStream;
    import org.apache.commons.compress.compressors.gzip.GzipCompressorOutputStream;
    import org.apache.commons.io.FileUtils;
    import org.apache.commons.io.IOUtils;
    import org.apache.commons.io.filefilter.DirectoryFileFilter;
    import org.apache.commons.io.filefilter.RegexFileFilter;
    
    public class TAR {
    
        public static void CreateTarGZ(String inputDirectoryPath, String outputPath) throws IOException {
    
            File inputFile = new File(inputDirectoryPath);
            File outputFile = new File(outputPath);
    
            try (FileOutputStream fileOutputStream = new FileOutputStream(outputFile);
                    BufferedOutputStream bufferedOutputStream = new BufferedOutputStream(fileOutputStream);
                    GzipCompressorOutputStream gzipOutputStream = new GzipCompressorOutputStream(bufferedOutputStream);
                    TarArchiveOutputStream tarArchiveOutputStream = new TarArchiveOutputStream(gzipOutputStream)) {
    
                tarArchiveOutputStream.setBigNumberMode(TarArchiveOutputStream.BIGNUMBER_POSIX);
                tarArchiveOutputStream.setLongFileMode(TarArchiveOutputStream.LONGFILE_GNU);
    
                List<File> files = new ArrayList<>(FileUtils.listFiles(
                        inputFile,
                        new RegexFileFilter("^(.*?)"),
                        DirectoryFileFilter.DIRECTORY
                ));
    
                for (int i = 0; i < files.size(); i++) {
                    File currentFile = files.get(i);
    
                    String relativeFilePath = inputFile.toURI().relativize(
                            new File(currentFile.getAbsolutePath()).toURI()).getPath();
    
                    TarArchiveEntry tarEntry = new TarArchiveEntry(currentFile, relativeFilePath);
                    tarEntry.setSize(currentFile.length());
    
                    tarArchiveOutputStream.putArchiveEntry(tarEntry);
                    tarArchiveOutputStream.write(IOUtils.toByteArray(new FileInputStream(currentFile)));
                    tarArchiveOutputStream.closeArchiveEntry();
                }
                tarArchiveOutputStream.close();
            }
        }
    }
    

    Maven

            <dependency>
                <groupId>commons-io</groupId>
                <artifactId>commons-io</artifactId>
                <version>2.6</version>
            </dependency>
    
            <dependency>
                <groupId>org.apache.commons</groupId>
                <artifactId>commons-compress</artifactId>
                <version>1.18</version>
            </dependency>
    
    0 讨论(0)
  • 2020-12-30 06:39

    Check below for Apache commons-compress and File walker examples.

    This example tar.gz a directory.

    public static void createTarGzipFolder(Path source) throws IOException {
    
            if (!Files.isDirectory(source)) {
                throw new IOException("Please provide a directory.");
            }
    
            // get folder name as zip file name
            String tarFileName = source.getFileName().toString() + ".tar.gz";
    
            try (OutputStream fOut = Files.newOutputStream(Paths.get(tarFileName));
                 BufferedOutputStream buffOut = new BufferedOutputStream(fOut);
                 GzipCompressorOutputStream gzOut = new GzipCompressorOutputStream(buffOut);
                 TarArchiveOutputStream tOut = new TarArchiveOutputStream(gzOut)) {
    
                Files.walkFileTree(source, new SimpleFileVisitor<>() {
    
                    @Override
                    public FileVisitResult visitFile(Path file,
                                                BasicFileAttributes attributes) {
    
                        // only copy files, no symbolic links
                        if (attributes.isSymbolicLink()) {
                            return FileVisitResult.CONTINUE;
                        }
    
                        // get filename
                        Path targetFile = source.relativize(file);
    
                        try {
                            TarArchiveEntry tarEntry = new TarArchiveEntry(
                                    file.toFile(), targetFile.toString());
    
                            tOut.putArchiveEntry(tarEntry);
    
                            Files.copy(file, tOut);
    
                            tOut.closeArchiveEntry();
    
                            System.out.printf("file : %s%n", file);
    
                        } catch (IOException e) {
                            System.err.printf("Unable to tar.gz : %s%n%s%n", file, e);
                        }
    
                        return FileVisitResult.CONTINUE;
                    }
    
                    @Override
                    public FileVisitResult visitFileFailed(Path file, IOException exc) {
                        System.err.printf("Unable to tar.gz : %s%n%s%n", file, exc);
                        return FileVisitResult.CONTINUE;
                    }
    
                });
    
                tOut.finish();
            }
    
        }
    

    This example extracts a tar.gz, and checks zip slip attack.

    public static void decompressTarGzipFile(Path source, Path target)
            throws IOException {
    
            if (Files.notExists(source)) {
                throw new IOException("File doesn't exists!");
            }
    
            try (InputStream fi = Files.newInputStream(source);
                 BufferedInputStream bi = new BufferedInputStream(fi);
                 GzipCompressorInputStream gzi = new GzipCompressorInputStream(bi);
                 TarArchiveInputStream ti = new TarArchiveInputStream(gzi)) {
    
                ArchiveEntry entry;
                while ((entry = ti.getNextEntry()) != null) {
    
                    Path newPath = zipSlipProtect(entry, target);
    
                    if (entry.isDirectory()) {
                        Files.createDirectories(newPath);
                    } else {
    
                        // check parent folder again
                        Path parent = newPath.getParent();
                        if (parent != null) {
                            if (Files.notExists(parent)) {
                                Files.createDirectories(parent);
                            }
                        }
    
                        // copy TarArchiveInputStream to Path newPath
                        Files.copy(ti, newPath, StandardCopyOption.REPLACE_EXISTING);
    
                    }
                }
            }
        }
    
        private static Path zipSlipProtect(ArchiveEntry entry, Path targetDir)
            throws IOException {
    
            Path targetDirResolved = targetDir.resolve(entry.getName());
    
            Path normalizePath = targetDirResolved.normalize();
    
            if (!normalizePath.startsWith(targetDir)) {
                throw new IOException("Bad entry: " + entry.getName());
            }
    
            return normalizePath;
        }
    

    References

    1. https://mkyong.com/java/how-to-create-tar-gz-in-java/
    2. https://commons.apache.org/proper/commons-compress/examples.html
    0 讨论(0)
  • 2020-12-30 06:43

    I ended up doing the following:

    public URL createTarGzip() throws IOException {
        Path inputDirectoryPath = ...
        File outputFile = new File("/path/to/filename.tar.gz");
    
        try (FileOutputStream fileOutputStream = new FileOutputStream(outputFile);
                BufferedOutputStream bufferedOutputStream = new BufferedOutputStream(fileOutputStream);
                GzipCompressorOutputStream gzipOutputStream = new GzipCompressorOutputStream(bufferedOutputStream);
                TarArchiveOutputStream tarArchiveOutputStream = new TarArchiveOutputStream(gzipOutputStream)) {
    
            tarArchiveOutputStream.setBigNumberMode(TarArchiveOutputStream.BIGNUMBER_POSIX);
            tarArchiveOutputStream.setLongFileMode(TarArchiveOutputStream.LONGFILE_GNU);
    
            List<File> files = new ArrayList<>(FileUtils.listFiles(
                    inputDirectoryPath,
                    new RegexFileFilter("^(.*?)"),
                    DirectoryFileFilter.DIRECTORY
            ));
    
            for (int i = 0; i < files.size(); i++) {
                File currentFile = files.get(i);
    
                String relativeFilePath = new File(inputDirectoryPath.toUri()).toURI().relativize(
                        new File(currentFile.getAbsolutePath()).toURI()).getPath();
    
                TarArchiveEntry tarEntry = new TarArchiveEntry(currentFile, relativeFilePath);
                tarEntry.setSize(currentFile.length());
    
                tarArchiveOutputStream.putArchiveEntry(tarEntry);
                tarArchiveOutputStream.write(IOUtils.toByteArray(new FileInputStream(currentFile)));
                tarArchiveOutputStream.closeArchiveEntry();
            }
            tarArchiveOutputStream.close();
            return outputFile.toURI().toURL();
        }
    }
    

    This takes care of the some of the edge cases that come up in the other solutions.

    0 讨论(0)
  • 2020-12-30 06:46

    Something I use (via Files.walk API), you can chain gzip(tar(youFile));

    public static File gzip(File fileToCompress) throws IOException {
    
        final File gzipFile = new File(fileToCompress.toPath().getParent().toFile(),
                fileToCompress.getName() + ".gz");
    
        final byte[] buffer = new byte[1024];
    
        try (FileInputStream in = new FileInputStream(fileToCompress);
                GZIPOutputStream out = new GZIPOutputStream(
                        new FileOutputStream(gzipFile))) {
    
            int len;
            while ((len = in.read(buffer)) > 0) {
                out.write(buffer, 0, len);
            }
        }
    
        return gzipFile;
    }
    
    public static File tar(File folderToCompress) throws IOException, ArchiveException {
    
        final File tarFile = Files.createTempFile(null, ".tar").toFile();
    
        try (TarArchiveOutputStream out = (TarArchiveOutputStream) new ArchiveStreamFactory()
                .createArchiveOutputStream(ArchiveStreamFactory.TAR,
                        new FileOutputStream(tarFile))) {
    
            out.setLongFileMode(TarArchiveOutputStream.LONGFILE_GNU);
    
            Files.walk(folderToCompress.toPath()) //
                    .forEach(source -> {
    
                        if (source.toFile().isFile()) {
                            final String relatifSourcePath = StringUtils.substringAfter(
                                    source.toString(), folderToCompress.getPath());
    
                            final TarArchiveEntry entry = new TarArchiveEntry(
                                    source.toFile(), relatifSourcePath);
    
                            try (InputStream in = new FileInputStream(source.toFile())){
                                out.putArchiveEntry(entry);
    
                                IOUtils.copy(in, out);
    
                                out.closeArchiveEntry();
                            }
                            catch (IOException e) {
                                // Handle this better than bellow...
                                throw new RuntimeException(e);
                            }
                        }
                    });
    
        }
    
        return tarFile;
    }
    
    0 讨论(0)
  • 2020-12-30 06:52

    I followed this solution and it worked until I was processing a larger set of files and it randomly crashes after processing 15000 - 16000 files. the following line is leaking file handlers:

    IOUtils.copy(new FileInputStream(f), tOut);
    

    and the code crashed with a "Too many open files" error at the OS level The following minor change fix the problem:

    FileInputStream in = new FileInputStream(f);
    IOUtils.copy(in, tOut);
    in.close();
    
    0 讨论(0)
提交回复
热议问题