开发工具:eclipse + maven + jdk1.8
代码
package com.zhiwei.hdfs;
import java.io.IOException;
import java.net.URI;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.DistributedFileSystem;
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
import org.apache.hadoop.io.IOUtils;
/**
* 问题:Permission Denied
* 设置hadoop目录的访问权限:hdfs dfs -chmod -R 777 hadoop目录路径
* Hadoop版本:hadoop-2.7.3
*/
public class HdfsClient {
private static String hdfsPath = "hdfs://192.168.204.129:9090";
private static String prefix = "hdfs://";
private static String targetHost = "localhost";
private static String targetPort = "9090";
private static Configuration conf = new Configuration();
private static FileSystem fileSystem = null;
private HdfsClient(){}
/**
* HDFS客户端初始化
* @param host
* @param port
*/
public static void initClient(String host,String port) {
initClient(host,port,"root");
}
public static void initClient(String host,String port, String user) {
try {
targetHost = host;
targetPort = port;
try {
//指定用户名连接HDFS
fileSystem = FileSystem.get(URI.create(prefix + targetHost + ":" + targetPort), conf, user);
} catch (InterruptedException e) {
e.printStackTrace();
}
} catch (IOException e) {
e.printStackTrace();
}
}
/**
* 获取HDFS集群DataNode节点信息
* Xceivers : 指datanode当前用于传输数据的线程数
* @return
*/
public static DatanodeInfo[] getDatanodeInfos(){
DatanodeInfo[] datanodeInfos= null;
try {
DistributedFileSystem dbfs = (DistributedFileSystem) fileSystem;
datanodeInfos = dbfs.getDataNodeStats();
} catch (IOException e) {
e.printStackTrace();
return null;
}
return datanodeInfos;
}
/**
* 判断HDFS文件是否存在
* @param hdfsFile
* @return
*/
public static boolean isFileExist(String hdfsFile){
boolean isSuccess = false;
try {
isSuccess = fileSystem.exists(new Path(hdfsFile));
} catch (IOException e) {
e.printStackTrace();
return false;
}
return isSuccess;
}
/**
* 获取HDFS目录下的所有文件信息
* @param hdfsFileDir
* @return
*/
public static FileStatus[] getFilesByDir(String hdfsFileDir){
FileStatus[] fileStatus = null;
try {
fileSystem = FileSystem.get(URI.create(hdfsPath),conf);
fileStatus = fileSystem.listStatus(new Path(hdfsFileDir));
} catch (IOException e) {
e.printStackTrace();
return null;
}
return fileStatus;
}
/**
* HDFS创建目录(递归创建)
* @param path
* @throws IOException
*/
public static boolean makeHdfsDir(String hdfsFileDir){
boolean isSuccess = false;
try {
isSuccess = fileSystem.mkdirs(new Path(hdfsFileDir));
} catch (IOException e) {
e.printStackTrace();
return false;
}
return isSuccess;
}
public static boolean deleteHdfsFile(String hdfsFilePath) {
return deleteHdfsFile(hdfsFilePath,true);
}
/**
* 删除HDFS文件
* @param hdfsFilePath HDFS文件路径
* @param isRecursive 是否递归删除
*/
public static boolean deleteHdfsFile(String hdfsFilePath, Boolean isRecursive){
boolean isSuccess = false;
try {
isSuccess = fileSystem.delete(new Path(hdfsFilePath),isRecursive);
} catch (IOException e) {
e.printStackTrace();
return false;
}
return isSuccess;
}
/**
* 读取HDFS文件内容
* @param hdfsFilePath
* @throws IOException
*/
public static byte[] readHdfsFile(String hdfsFilePath) throws IOException{
FSDataInputStream fis = null;
byte[] data = null;
try {
fis = fileSystem.open(new Path(hdfsFilePath));
data = new byte[fis.available()];
fis.read(data, 0, fis.available());
} finally {
IOUtils.closeStream(fis);
}
return data;
}
/**
* 重命名HDFS文件
* @param oldName 源文件名:全路径
* @param newName 目标文件名:全路径
* @return
*/
public static boolean renameHdfsFile(String oldName,String newName){
try {
fileSystem.rename(new Path(oldName), new Path(newName));
} catch (IOException e) {
e.printStackTrace();
return false;
}
return true;
}
/**
* 将信息写入HDFS新文件中保存
* @param dest HDFS新文件路径
* @param content 信息字节数组
* @return
*/
public static boolean writeInfoToHdfsFile(String dest,byte[] content){
FSDataOutputStream fsDataOutputStream = null;
try {
fsDataOutputStream = fileSystem.create(new Path(dest));
fsDataOutputStream.write(content);
fsDataOutputStream.flush();
} catch (IOException e) {
e.printStackTrace();
return false;
}finally {
IOUtils.closeStream(fsDataOutputStream);
}
return true;
}
/**
* HDFS默认文件文件上传方法
* @param src 源文件地址
* @param dest hdfs文件地址
* @return 状态
*/
public static boolean uploadLocalFileToHDFS(String src,String dest){
return uploadLocalFileToHDFS(false, false, src, dest);
}
/**
* 上传本地文件到Hadoop的HDFS文件系统
* @param delSrc:是否删除源文件:默认不删除
* @param override:是否覆盖同名文件:默认不覆盖
* @param src 本地文件全路径
* @param dest hadoop HDFS文件系统全路径
* @return
*/
public static boolean uploadLocalFileToHDFS(boolean delSrc,boolean override,String src,String dest){
try {
//注意:目标地址可以写全路径,如果不写则默认在当前访问的用户主目录下操作
fileSystem.copyFromLocalFile(delSrc,override,new Path(src), new Path(dest));
} catch (IOException e) {
e.printStackTrace();
return false;
}
return true;
}
/**
* 关闭HDFS客户端
*/
public static void close() {
if(fileSystem != null ) {
try {
fileSystem.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
}
测试代码:
package com.zhiwei.hdfs;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
public class HdfsClientTest {
@Before
public void init() {
System.setProperty("hadoop.home.dir", "D:\\Tools\\hadoop-2.7.3");
}
/**
* 获取HDFS节点信息
* @throws Exception
*/
@Test
public void getDatanodeInfosTest() throws Exception {
HdfsClient.initClient("192.168.204.129", "9090", "squirrel");
DatanodeInfo[] datanodeInfos = HdfsClient.getDatanodeInfos();
for(DatanodeInfo datanodeInfo : datanodeInfos) {
System.out.println("节点主机名:" + datanodeInfo.getHostName());
System.out.println("节点Http访问端口:" + datanodeInfo.getInfoPort());
System.out.println("节点IPC访问端口:" + datanodeInfo.getIpcPort());
System.out.println("节点已用缓存:" + datanodeInfo.getCacheUsedPercent());
}
}
/**
* 判断文件是否存在
* @throws Exception
*/
@Test
public void isFileExistTest() throws Exception {
HdfsClient.initClient("192.168.204.129", "9090", "squirrel");
System.out.println(HdfsClient.isFileExist("/data"));
}
/**
* 获取目录下的文件列表
* @throws Exception
*/
@Test
public void getFilesByDirTest() throws Exception {
HdfsClient.initClient("192.168.204.129", "9090", "squirrel");
FileStatus[] fStatus = HdfsClient.getFilesByDir("/data");
for(FileStatus fs : fStatus) {
System.out.println("子文件路径:" + fs.getPath()
+ ", " + "子文件属组:" + fs.getGroup()
+ ", 文件属主: " + fs.getOwner());
}
}
/**
* HDFS创建目录
* @throws Exception
*/
@Test
public void makeHdfsDirTest() throws Exception {
HdfsClient.initClient("192.168.204.129", "9090", "squirrel");
System.out.println("文件创建成功: " + HdfsClient.makeHdfsDir("/data/test"));
}
/**
* HDFS删除目录
* @throws Exception
*/
@Test
public void deleteHdfsFileTest() throws Exception {
HdfsClient.initClient("192.168.204.129", "9090", "squirrel");
System.out.println("文件删除成功: " + HdfsClient.deleteHdfsFile("/data/test",true));
}
/**
* 读取HDFS文件
* @throws Exception
*/
@Test
public void readHdfsFileTest() throws Exception {
HdfsClient.initClient("192.168.204.129", "9090", "squirrel");
System.out.println("HDFS文件内容: " + Bytes.toString(HdfsClient.readHdfsFile("/data/mapreduce/output/part-r-00000")));
}
/**
* 读取文件重命名
* @throws Exception
*/
@Test
public void renameHdfsFileTest() throws Exception {
HdfsClient.initClient("192.168.204.129", "9090", "squirrel");
System.out.println("文件重命名成功: " + HdfsClient.renameHdfsFile("/data/mapreduce/output/test","/data/mapreduce/output/test1"));
}
/**
* 将数据写入HDFS
* @throws Exception
*/
@Test
public void writeInfoToHdfsFileTest() throws Exception {
HdfsClient.initClient("192.168.204.129", "9090", "squirrel");
System.out.println("数据写入HDFS: " + HdfsClient.writeInfoToHdfsFile("/data/Test","/data/mapreduce/output/test1".getBytes()));
}
/**
* 文件上传HDFS
* @throws Exception
*/
@Test
public void uploadLocalFileToHDFSTest() throws Exception {
HdfsClient.initClient("192.168.204.129", "9090", "squirrel");
System.out.println("文件上传HDFS: " + HdfsClient.uploadLocalFileToHDFS(true,true,"d://temp/test.txt","/data/Test"));
}
@After
public void close() {
HdfsClient.close();
}
}
maven 配置
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.zhiwei</groupId>
<artifactId>hadoop</artifactId>
<version>0.0.1-SNAPSHOT</version>
<packaging>jar</packaging>
<name>Hadoop</name>
<url>http://maven.apache.org</url>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<java.version>1.8</java.version>
<hadoop.version>2.7.3</hadoop.version>
<hbase.version>1.2.6</hbase.version>
<hive.version>2.3.1</hive.version>
<zookeeper.version>3.4.8</zookeeper.version>
<curator.version>4.0.0</curator.version>
<fastjson.version>1.2.41</fastjson.version>
<mahout.version>0.13.0</mahout.version>
<kafka.version>0.11.0.2</kafka.version>
<zkclient.version>0.10</zkclient.version>
<junit.version>4.12</junit.version>
</properties>
<dependencies>
<!-- 配置Zookeeper -->
<dependency>
<groupId>org.apache.zookeeper</groupId>
<artifactId>zookeeper</artifactId>
<version>${zookeeper.version}</version>
</dependency>
<!-- Netflix Zookeeper组件 -->
<dependency>
<groupId>org.apache.curator</groupId>
<artifactId>curator-client</artifactId>
<version>${curator.version}</version>
</dependency>
<!-- Netflix Zookeeper组件 -->
<dependency>
<groupId>com.101tec</groupId>
<artifactId>zkclient</artifactId>
<version>${zkclient.version}</version>
</dependency>
<!-- Hadoop -->
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>${hadoop.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>${hadoop.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs</artifactId>
<version>${hadoop.version}</version>
</dependency>
<!-- Hbase -->
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-client</artifactId>
<version>${hbase.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-server</artifactId>
<version>${hbase.version}</version>
</dependency>
<!-- hive -->
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-jdbc</artifactId>
<version>${hive.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-exec</artifactId>
<version>${hive.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-metastore</artifactId>
<version>${hive.version}</version>
</dependency>
<!-- Kafka -->
<!-- <dependency>
<groupId>org.apache.kafka</groupId>
<artifactId>kafka-clients</artifactId>
<version>${kafka.version></version>
</dependency> -->
<!-- mahout -->
<dependency>
<groupId>org.apache.mahout</groupId>
<artifactId>mahout-math</artifactId>
<version>${mahout.version}</version>
</dependency>
<dependency>
<groupId>org.apache.mahout</groupId>
<artifactId>mahout-hdfs</artifactId>
<version>${mahout.version}</version>
</dependency>
<!-- Alibaba FastJson -->
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>fastjson</artifactId>
<version>${fastjson.version}</version>
</dependency>
<!-- 配置JUNIT -->
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>${junit.version}</version>
</dependency>
<!-- 覆盖默认Guava(hive)版本,防止出现Guava版本冲突问题 -->
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
<version>11.0.2</version>
</dependency>
</dependencies>
<!-- 指定maven项目的JDK版本 -->
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<configuration>
<source>${java.version}</source>
<target>${java.version}</target>
</configuration>
</plugin>
</plugins>
</build>
</project>
注意: hadoop运行其实并不依赖与Hadoop Eclipse插件,Hadoop Eclipse插件只是简单的封装Hadoop的配置参数,本质也是通过Hadoop的API访问的,将HDFS文件系统以树结构的形式呈现。
项目结构:
来源:oschina
链接:https://my.oschina.net/u/4074151/blog/3014232