分布式环境:多个服务器运行Hadoop各个进程
-
实验环境
主机名 |
主机IP |
角色 |
操作系统 |
master |
192.168.3.154 |
NameNode |
CentOS 7.6 |
slave1 |
192.168.3.155 |
DataNode |
CentOS 7.6 |
slave2 |
192.168.3.156 |
DataNode |
CentOS 7.6 |
-
分布式环境(集群)搭建
1.首先设置SSH免密登陆
[root@master ~]# vim /etc/hosts
192.168.3.154 master
192.168.3.155 slave1
192.168.3.156 slave2
[root@master ~]# ssh-keygen
Generating public/private rsa key pair.
Enter file in which to save the key (/root/.ssh/id_rsa):
The key fingerprint is:
SHA256:AChKhPmoKYhI7IwZAOvVrrn9+MI4hrL8JvESq3NQ7jI root@master
The key's randomart image is:
+---[RSA 2048]----+
|=o .. |
|=o. .. |
|*+ . .. |
|=o+ . . |
|XB . S |
|@=. o |
|E=o*oo. |
|=B*o.o+o |
+----[SHA256]-----+
[root@master ~]# ssh-copy-id 192.168.3.154
[root@master ~]# ssh-copy-id 192.168.3.155
[root@master ~]# ssh-copy-id 192.168.3.156
[root@master ~]#
所有节点都要进行配置
2.在master上安装JDK,配置环境变量
JDK下载地址:https://www.oracle.com/technetwork/java/javase/downloads/jdk8-downloads-2133151.html
[root@master ~]# rz
[root@master ~]# tar -xzf jdk-8u191-linux-x64_\(1\).tar.gz -C /usr/local/
[root@master ~]# ls /usr/local/jdk1.8.0_191/
bin lib src.zip
COPYRIGHT LICENSE THIRDPARTYLICENSEREADME-JAVAFX.txt
include man THIRDPARTYLICENSEREADME.txt
javafx-src.zip README.html
jre release
[root@master ~]# vim /etc/profile
export JAVA_HOME=/usr/local/jdk1.8.0_191
export PATH=${JAVA_HOME}/bin:$PATH
export JRE_HOME=${JAVA_HOME}/jre
[root@master ~]# source /etc/profile
[root@master ~]# java -version
java version "1.8.0_191"
Java(TM) SE Runtime Environment (build 1.8.0_191-b12)
Java HotSpot(TM) 64-Bit Server VM (build 25.191-b12, mixed mode)
[root@master ~]#
3.将JAVA环境上传到各从节点的相同路径下
[root@master ~]# scp -r /usr/local/jdk1.8.0_191/ root@slave1:/usr/local/
[root@master ~]# scp -r /usr/local/jdk1.8.0_191/ root@slave2:/usr/local/
4.安装Hadoop并配置环境变量
Hadoop下载地址:https://hadoop.apache.org/releases.html
[root@master ~]# rz
[root@master ~]# tar -xzf hadoop-2.9.2.tar.gz -C /usr/local/
[root@master ~]# ls /usr/local/hadoop-2.9.2/
bin include libexec NOTICE.txt sbin
etc lib LICENSE.txt README.txt share
[root@master ~]# vim /etc/profile
export HADOOP_HOME=/usr/local/hadoop-2.9.2
export PATH=$PATH:${HADOOP_HOME}/bin
[root@master ~]# source /etc/profile
[root@master ~]#
5.将/etc/profile文件上传到各从节点的/etc目录下,覆盖原来的文件
[root@master ~]# scp /etc/profile root@slave1:/etc/
profile 100% 1973 1.6MB/s 00:00
[root@master ~]# scp /etc/profile root@slave2:/etc/
profile 100% 1973 1.7MB/s 00:00
[root@master ~]# ssh 192.168.3.155
Last login: Wed May 1 23:13:33 2019 from master
[root@slave1 ~]# source /etc/profile
[root@slave1 ~]# java -version
java version "1.8.0_191"
Java(TM) SE Runtime Environment (build 1.8.0_191-b12)
Java HotSpot(TM) 64-Bit Server VM (build 25.191-b12, mixed mode)
[root@slave1 ~]# exit
logout
Connection to 192.168.3.155 closed.
[root@master ~]# ssh 192.168.3.156
Last login: Wed May 1 23:16:29 2019 from master
[root@slave2 ~]# source /etc/profile
[root@slave2 ~]# java -version
java version "1.8.0_191"
Java(TM) SE Runtime Environment (build 1.8.0_191-b12)
Java HotSpot(TM) 64-Bit Server VM (build 25.191-b12, mixed mode)
[root@slave2 ~]# exit
logout
Connection to 192.168.3.156 closed.
[root@master ~]#
6.修改hadoop-env.sh,添加环境变量
[root@master ~]# cd /usr/local/hadoop-2.9.2/etc/hadoop/
[root@master hadoop]# vim hadoop-env.sh
export JAVA_HOME=/usr/local/jdk1.8.0_191
export HADOOP_HOME=/usr/local/hadoop-2.9.2
[root@master hadoop]#
7.修改core-site.xml文件,指定NameNode所运行的节点
[root@master hadoop]# vim core-site.xml
<configuration>
<propety>
<name>fs.default.name</name>
<value>hdfs://master:9000</value>
</propety>
</configuration>
[root@master hadoop]#
参数详解:
fs.default.name #设置HDFS服务的主机名和端口号
8.修改hdfs-site.xml文件
[root@master hadoop]# vim hdfs-site.xml
<configuration>
<property>
<name>dfs.replication</name>
<value>3</value>
</property>
<property>
<name>dfs.name.dir</name>
<value>/usr/local/hdfs/name</value>
</property>
<property>
<name>dfs.data.dir</name>
<value>/usr/local/hdfs/data</value>
</property>
<property>
<name>dfs.secondary.http.address</name>
<value>master:50090</value>
</property>
</configuration>
[root@master hadoop]#
参数详解:
dfs.replication #文本副本数
dfs.name.dir #NameNode元数据存放路径
dfs.data.dir #DataNode存放数据的路径
9.修改mapred-site.xml配置文件,指明MapReduce基于YARN工作
[root@master hadoop]# mv mapred-site.xml.template mapred-site.xml
[root@master hadoop]# vim mapred-site.xml
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
</configuration>
[root@master hadoop]#
10.修改yarn-site.xml文件,设置ResourceManager服务的主机名和端口号,指明mapreduce_shuffle的类
[root@master hadoop]# vim yarn-site.xml
<configuration>
<property>
<property>
<name>yarn.resourcemanager.address</name>
<value>master:8080</value>
</property>
<property>
<name>yarn.resourcemanager.webapp.address</name>
<value>master:8088</value>
</property>
<property>
<name>yarn.resourcemanager.resource-tracker.address</name>
<value>master:8082</value>
</property>
<property>
<name>yarn.nademanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.nodemanager.aux-services.mapreduce.shuffle.class</name>
<value>httpshuffle</value>
</property>
</configuration>
[root@master hadoop]#
11.修改slaves文件,添加各个从节点
[root@master hadoop]# vim slaves
master
slave1
slave2
[root@master hadoop]#
12.将Hadoop安装目录上传到各个节点的相同路径下
[root@master ~]# scp -r /usr/local/hadoop-2.9.2/ root@slave1:/usr/local/
[root@master ~]# scp -r /usr/local/hadoop-2.9.2/ root@slave2:/usr/local/
13.格式化HDFS
[root@master ~]# cd /usr/local/hadoop-2.9.2
[root@master hadoop-2.9.2]# bin/hadoop namenode -format
...
/************************************************************
STARTUP_MSG: Starting NameNode
STARTUP_MSG: host = master/192.168.3.154
STARTUP_MSG: args = [-format]
STARTUP_MSG: version = 2.9.2
/************************************************************
SHUTDOWN_MSG: Shutting down NameNode at master/192.168.3.154
************************************************************/
14.启动hadoop并验证
[root@master hadoop-2.9.2]# chmod +x -R sbin/*
[root@master hadoop-2.9.2]# sbin/start-all.sh
Starting secondary namenodes [master]
master: starting secondarynamenode, logging to /usr/local/hadoop-2.9.2/logs/hadoop-root-secondarynamenode-master.out
starting yarn daemons
resourcemanager running as process 9348. Stop it first.
master: nodemanager running as process 10608. Stop it first.
slave1: starting nodemanager, logging to /usr/local/hadoop-2.9.2/logs/yarn-root-nodemanager-slave1.out
slave2: starting nodemanager, logging to /usr/local/hadoop-2.9.2/logs/yarn-root-nodemanager-slave2.out
[root@master hadoop-2.9.2]# jps
10608 NodeManager
9348 ResourceManager
10717 Jps
[root@master hadoop-2.9.2]# ssh 192.168.3.155
Last login: Thu May 2 02:32:06 2019 from master
[root@slave1 ~]# jps
9248 NodeManager
9375 Jps
[root@slave1 ~]# exit
logout
Connection to 192.168.3.155 closed.
[root@master hadoop-2.9.2]# ssh 192.168.3.156
Last login: Thu May 2 02:07:00 2019 from slave1
[root@slave2 ~]# jps
14755 NodeManager
14901 Jps
[root@slave2 ~]#
OK,Hadoop分布式环境搭建成功。
来源:CSDN
作者:a阿飞
链接:https://blog.csdn.net/qq_41490561/article/details/104551126