【Hadoop】- HDFS Java客户端操作

开发工具：eclipse + maven + jdk1.8

代码

package com.zhiwei.hdfs;

import java.io.IOException;
import java.net.URI;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.DistributedFileSystem;
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
import org.apache.hadoop.io.IOUtils;

/**
 *  问题：Permission Denied
 *  设置hadoop目录的访问权限:hdfs dfs -chmod -R 777 hadoop目录路径 
 *  Hadoop版本：hadoop-2.7.3
 */
public class HdfsClient {

	private static String hdfsPath = "hdfs://192.168.204.129:9090";
	private static String prefix = "hdfs://";
	private static String targetHost = "localhost";
	private static String targetPort = "9090";
	private static Configuration conf = new Configuration();
	private static FileSystem fileSystem = null;
	private HdfsClient(){}
	
	/**
	 * HDFS客户端初始化
	 * @param host
	 * @param port
	 */
	public static void initClient(String host,String port) {
			
		initClient(host,port,"root");
	}
	
	public static void initClient(String host,String port, String user) {
 		
		try {
		
			targetHost = host;
			targetPort = port;
			
			try {
				
				//指定用户名连接HDFS
				fileSystem = FileSystem.get(URI.create(prefix + targetHost + ":" + targetPort), conf, user);
			
			} catch (InterruptedException e) {
				e.printStackTrace();
			}
		
		} catch (IOException e) {
			e.printStackTrace();
		}
	}
	
	/**
	 * 获取HDFS集群DataNode节点信息
	 * Xceivers : 指datanode当前用于传输数据的线程数
	 * @return
	 */
	public static DatanodeInfo[] getDatanodeInfos(){
		
		DatanodeInfo[] datanodeInfos= null;
		try {
			
			DistributedFileSystem dbfs = (DistributedFileSystem) fileSystem;
			datanodeInfos = dbfs.getDataNodeStats();
		
		} catch (IOException e) {
			e.printStackTrace();
			return null;
		}
		return datanodeInfos;
	}
	
	/**
	 * 判断HDFS文件是否存在
	 * @param hdfsFile
	 * @return
	 */
	public static boolean isFileExist(String hdfsFile){
       
		boolean isSuccess = false;
		try {
			isSuccess = fileSystem.exists(new Path(hdfsFile));
		} catch (IOException e) {
			e.printStackTrace();
			return false;
		}
		return isSuccess;
	}
	
	/**
	 * 获取HDFS目录下的所有文件信息
	 * @param hdfsFileDir
	 * @return
	 */
	public static FileStatus[] getFilesByDir(String hdfsFileDir){
		
        FileStatus[] fileStatus = null;
        try {
			
			fileSystem = FileSystem.get(URI.create(hdfsPath),conf);
			fileStatus = fileSystem.listStatus(new Path(hdfsFileDir));
		} catch (IOException e) {
			e.printStackTrace();
			return null;
		}
        return fileStatus;
	}
	
	 /**
	  * HDFS创建目录(递归创建)
	  * @param path
	  * @throws IOException
	  */
    public static boolean makeHdfsDir(String hdfsFileDir){
    	
        boolean isSuccess = false;
		try {
			isSuccess = fileSystem.mkdirs(new Path(hdfsFileDir));
		} catch (IOException e) {
			e.printStackTrace();
			return false;
		}
		return isSuccess;
    }
    
    
    public static boolean deleteHdfsFile(String hdfsFilePath) {
    	return deleteHdfsFile(hdfsFilePath,true);
    }
    
    /**
	  * 删除HDFS文件
	  * @param hdfsFilePath HDFS文件路径
	  * @param isRecursive 是否递归删除
	  */
   public static boolean deleteHdfsFile(String hdfsFilePath, Boolean isRecursive){
   	
       boolean isSuccess = false;
		try {
			isSuccess = fileSystem.delete(new Path(hdfsFilePath),isRecursive);
		} catch (IOException e) {
			e.printStackTrace();
			return false;
		}
		return isSuccess;
   }
	
	 /**
	  * 读取HDFS文件内容
	  * @param hdfsFilePath
	 * @throws IOException 
	  */
    public static byte[] readHdfsFile(String hdfsFilePath) throws IOException{
    	
    	FSDataInputStream fis = null;
    	 byte[] data = null;
    	try {
    		fis = fileSystem.open(new Path(hdfsFilePath));
            data = new byte[fis.available()];
            fis.read(data, 0, fis.available());
        } finally {
        	IOUtils.closeStream(fis);
        }
        return data;
    }
	
	/**
	 * 重命名HDFS文件
	 * @param oldName 源文件名：全路径
	 * @param newName 目标文件名：全路径
	 * @return
	 */
	public static boolean renameHdfsFile(String oldName,String newName){
		
			try {
				 fileSystem.rename(new Path(oldName), new Path(newName));
				
			} catch (IOException e) {
				e.printStackTrace();
				return false;
			}
	       return true;
	}
	
	/**
	 * 将信息写入HDFS新文件中保存
	 * @param dest HDFS新文件路径
	 * @param content 信息字节数组
	 * @return
	 */
	public static boolean writeInfoToHdfsFile(String dest,byte[] content){
		
		    FSDataOutputStream fsDataOutputStream = null;
	        try {
	        	
	        	fsDataOutputStream = fileSystem.create(new Path(dest));
	        	fsDataOutputStream.write(content);
	        	fsDataOutputStream.flush();
	        	
			} catch (IOException e) {
				e.printStackTrace();
				return false;
			}finally {
				IOUtils.closeStream(fsDataOutputStream);
			}
	        return true;
	}

	/**
	 * HDFS默认文件文件上传方法
	 * @param src 源文件地址
	 * @param dest hdfs文件地址
	 * @return 状态
	 */
	public static boolean uploadLocalFileToHDFS(String src,String dest){
		return uploadLocalFileToHDFS(false, false, src, dest);
	}
	
	/**
	 * 上传本地文件到Hadoop的HDFS文件系统
	 * @param delSrc:是否删除源文件：默认不删除
	 * @param override:是否覆盖同名文件：默认不覆盖
	 * @param src 本地文件全路径
	 * @param dest hadoop HDFS文件系统全路径
	 * @return 
	 */
	public static boolean uploadLocalFileToHDFS(boolean delSrc,boolean override,String src,String dest){

		try {
			
			//注意：目标地址可以写全路径，如果不写则默认在当前访问的用户主目录下操作
			fileSystem.copyFromLocalFile(delSrc,override,new Path(src), new Path(dest));
			
	
		} catch (IOException e) {
			e.printStackTrace();
			return false;
		}
		return true;
	}
	
	/**
	 * 关闭HDFS客户端
	 */
	public static void close() {
		
		if(fileSystem != null ) {
			try {
				fileSystem.close();
			} catch (IOException e) {
				e.printStackTrace();
			}
		}
	}
}

测试代码：

package com.zhiwei.hdfs;

import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;

public class HdfsClientTest {

	@Before
	public void init() {
		
		System.setProperty("hadoop.home.dir", "D:\\Tools\\hadoop-2.7.3");
	}
	
	/**
	 * 获取HDFS节点信息
	 * @throws Exception
	 */
	@Test
	public void getDatanodeInfosTest() throws Exception {

		HdfsClient.initClient("192.168.204.129", "9090", "squirrel");

		DatanodeInfo[] datanodeInfos = HdfsClient.getDatanodeInfos();
		
		for(DatanodeInfo datanodeInfo : datanodeInfos) {
			
			System.out.println("节点主机名：" + datanodeInfo.getHostName());
			System.out.println("节点Http访问端口：" + datanodeInfo.getInfoPort());
			System.out.println("节点IPC访问端口：" + datanodeInfo.getIpcPort());
			System.out.println("节点已用缓存：" + datanodeInfo.getCacheUsedPercent());
		}

	}
	
	/**
	 * 判断文件是否存在
	 * @throws Exception
	 */
	@Test
	public void isFileExistTest() throws Exception {

		HdfsClient.initClient("192.168.204.129", "9090", "squirrel");

		System.out.println(HdfsClient.isFileExist("/data"));
	}
	
	/**
	 * 获取目录下的文件列表
	 * @throws Exception
	 */
	@Test
	public void getFilesByDirTest() throws Exception {

		HdfsClient.initClient("192.168.204.129", "9090", "squirrel");

		FileStatus[] fStatus = HdfsClient.getFilesByDir("/data");
		
		for(FileStatus fs : fStatus) {
			System.out.println("子文件路径：" + fs.getPath() 
			                 + ", " + "子文件属组：" + fs.getGroup() 
			                 + ", 文件属主： " + fs.getOwner());
		}
	}
	
	/**
	 * HDFS创建目录
	 * @throws Exception
	 */
	@Test
	public void makeHdfsDirTest() throws Exception {

		HdfsClient.initClient("192.168.204.129", "9090", "squirrel");

		System.out.println("文件创建成功： " + HdfsClient.makeHdfsDir("/data/test"));
	}
	
	
	/**
	 * HDFS删除目录
	 * @throws Exception
	 */
	@Test
	public void deleteHdfsFileTest() throws Exception {

		HdfsClient.initClient("192.168.204.129", "9090", "squirrel");

		System.out.println("文件删除成功： " + HdfsClient.deleteHdfsFile("/data/test",true));
	}
	
	
	/**
	 * 读取HDFS文件
	 * @throws Exception
	 */
	@Test
	public void readHdfsFileTest() throws Exception {

		HdfsClient.initClient("192.168.204.129", "9090", "squirrel");

		System.out.println("HDFS文件内容： " + Bytes.toString(HdfsClient.readHdfsFile("/data/mapreduce/output/part-r-00000")));
	}
	
	/**
	 * 读取文件重命名
	 * @throws Exception
	 */
	@Test
	public void renameHdfsFileTest() throws Exception {

		HdfsClient.initClient("192.168.204.129", "9090", "squirrel");

		System.out.println("文件重命名成功： " + HdfsClient.renameHdfsFile("/data/mapreduce/output/test","/data/mapreduce/output/test1"));
	}
	
	/**
	 * 将数据写入HDFS
	 * @throws Exception
	 */
	@Test
	public void writeInfoToHdfsFileTest() throws Exception {

		HdfsClient.initClient("192.168.204.129", "9090", "squirrel");

		System.out.println("数据写入HDFS： " + HdfsClient.writeInfoToHdfsFile("/data/Test","/data/mapreduce/output/test1".getBytes()));
	}
	
	/**
	 * 文件上传HDFS
	 * @throws Exception
	 */
	@Test
	public void uploadLocalFileToHDFSTest() throws Exception {

		HdfsClient.initClient("192.168.204.129", "9090", "squirrel");

		System.out.println("文件上传HDFS： " + HdfsClient.uploadLocalFileToHDFS(true,true,"d://temp/test.txt","/data/Test"));
	}
	
	@After
	public void close() {
		
		HdfsClient.close();
	}
}

maven 配置

<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
  xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
  
  <modelVersion>4.0.0</modelVersion>

  <groupId>com.zhiwei</groupId>
  <artifactId>hadoop</artifactId>
  <version>0.0.1-SNAPSHOT</version>
  <packaging>jar</packaging>

  <name>Hadoop</name>
  <url>http://maven.apache.org</url>
  
   <properties>
        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
        <java.version>1.8</java.version>
        <hadoop.version>2.7.3</hadoop.version>
        <hbase.version>1.2.6</hbase.version>
        <hive.version>2.3.1</hive.version>
        <zookeeper.version>3.4.8</zookeeper.version>
        <curator.version>4.0.0</curator.version>
        <fastjson.version>1.2.41</fastjson.version>
        <mahout.version>0.13.0</mahout.version>
        <kafka.version>0.11.0.2</kafka.version>
        <zkclient.version>0.10</zkclient.version>
        <junit.version>4.12</junit.version>
    </properties>

  <dependencies>
    <!-- 配置Zookeeper -->
    <dependency>
		<groupId>org.apache.zookeeper</groupId>
		<artifactId>zookeeper</artifactId>
		<version>${zookeeper.version}</version>
	</dependency>
	
	<!-- Netflix Zookeeper组件 -->
	<dependency>
    <groupId>org.apache.curator</groupId>
    <artifactId>curator-client</artifactId>
    <version>${curator.version}</version>
	</dependency>
	
	<!-- Netflix Zookeeper组件 -->
	<dependency>
	    <groupId>com.101tec</groupId>
	    <artifactId>zkclient</artifactId>
	    <version>${zkclient.version}</version>
    </dependency>
	
	<!-- Hadoop -->
	 <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-client</artifactId>
            <version>${hadoop.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-common</artifactId>
            <version>${hadoop.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-hdfs</artifactId>
            <version>${hadoop.version}</version>
        </dependency>
        
        <!-- Hbase -->
        <dependency>
            <groupId>org.apache.hbase</groupId>
            <artifactId>hbase-client</artifactId>
            <version>${hbase.version}</version>
        </dependency>
        <dependency>
	     <groupId>org.apache.hbase</groupId>
	     <artifactId>hbase-server</artifactId>
	     <version>${hbase.version}</version>
	   </dependency>
	   
	   <!-- hive -->
       <dependency>
          <groupId>org.apache.hive</groupId>
          <artifactId>hive-jdbc</artifactId>
          <version>${hive.version}</version>
       </dependency>
	   <dependency>
	     <groupId>org.apache.hive</groupId>
	     <artifactId>hive-exec</artifactId>
	     <version>${hive.version}</version>
	   </dependency>
		<dependency>
		    <groupId>org.apache.hive</groupId>
		    <artifactId>hive-metastore</artifactId>
		    <version>${hive.version}</version>
		</dependency>
	   
	   
	   <!-- Kafka -->
	  <!--  <dependency>
		    <groupId>org.apache.kafka</groupId>
		    <artifactId>kafka-clients</artifactId>
		    <version>${kafka.version></version>
		</dependency> -->
		
		<!-- mahout -->
		<dependency>
		    <groupId>org.apache.mahout</groupId>
		    <artifactId>mahout-math</artifactId>
		    <version>${mahout.version}</version>
		</dependency>
		<dependency>
		    <groupId>org.apache.mahout</groupId>
		    <artifactId>mahout-hdfs</artifactId>
		    <version>${mahout.version}</version>
		</dependency>
	   
	   <!-- Alibaba FastJson -->
	   <dependency>
		    <groupId>com.alibaba</groupId>
		    <artifactId>fastjson</artifactId>
		    <version>${fastjson.version}</version>
		</dependency>
		
		<!-- 配置JUNIT -->
        <dependency>
		    <groupId>junit</groupId>
		    <artifactId>junit</artifactId>
		    <version>${junit.version}</version>
		</dependency>  	
		
		 <!-- 覆盖默认Guava(hive)版本，防止出现Guava版本冲突问题 -->
	   <dependency>
	      <groupId>com.google.guava</groupId>
	      <artifactId>guava</artifactId>
	      <version>11.0.2</version>
	    </dependency>
	</dependencies>

	<!-- 指定maven项目的JDK版本 -->
	<build>  
	    <plugins>  
	      <plugin>  
	        <groupId>org.apache.maven.plugins</groupId>  
	        <artifactId>maven-compiler-plugin</artifactId>  
	        <configuration>  
	          <source>${java.version}</source>  
	          <target>${java.version}</target>  
	        </configuration>  
	      </plugin> 
	    </plugins>  
	</build>  
</project>

注意: hadoop运行其实并不依赖与Hadoop Eclipse插件，Hadoop Eclipse插件只是简单的封装Hadoop的配置参数，本质也是通过Hadoop的API访问的，将HDFS文件系统以树结构的形式呈现。

项目结构：

这里写图片描述