We are storing zip files, containing XML files, in HDFS. We need to be able to programmatically unzip the file and stream out the contained XML files, using Java. FileSystem
Hi Please find the sample code,
public static Map<String, byte[]> loadZipFileData(String hdfsFilePath) {
try {
ZipInputStream zipInputStream = readZipFileFromHDFS(new Path(hdfsFilePath));
ZipEntry zipEntry = null;
byte[] buf = new byte[1024];
Map<String, byte[]> listOfFiles = new LinkedHashMap<>();
while ((zipEntry = zipInputStream.getNextEntry()) != null ) {
int bytesRead = 0;
String entryName = zipEntry.getName();
if (!zipEntry.isDirectory()) {
ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
while ((bytesRead = zipInputStream.read(buf, 0, 1024)) > -1) {
outputStream.write(buf, 0, bytesRead);
}
listOfFiles.put(entryName, outputStream.toByteArray());
outputStream.close();
}
zipInputStream.closeEntry();
}
zipInputStream.close();
return listOfFiles;
} catch (Exception e) {
e.printStackTrace();
}
}
protected ZipInputStream readZipFileFromHDFS(FileSystem fileSystem, Path path) throws Exception {
if (!fileSystem.exists(path)) {
throw new IllegalArgumentException(path.getName() + " does not exist");
}
FSDataInputStream fsInputStream = fileSystem.open(path);
ZipInputStream zipInputStream = new ZipInputStream(fsInputStream);
return zipInputStream;
}