centos7下hadoop-3.1.0集群搭建
环境准备
1.服务器概览
hostname | ip | 说明 |
---|---|---|
node1.spark | 192.168.2.140 | node1节点(master) |
node2.spark | 192.168.2.141 | node2节点 |
node3.spark | 192.168.2.142 | node3节点 |
分别在三台服务器上执行以下命令
#添加host
[root@node1 ~] vim /etc/hosts
192.168.2.140 node1.spark
192.168.2.141 node2.spark
192.168.2.142 node3.spark
#执行以下命令关闭防火墙
[root@node1 ~]systemctl stop firewalld && systemctl disable firewalld
[root@node1 ~]setenforce 0
#将SELINUX的值改成disabled
[root@node1 ~]vim /etc/selinux/config
SELINUX=disabled
#重启服务器
[root@node1 ~]reboot
2.配置免密码登录
#node1执行以下命令
#生成密钥Pair,输入之后一直选择enter即可。生成的秘钥位于 ~/.ssh文件夹下
[root@node1 ~]# ssh-keygen -t rsa
[root@node1 .ssh]# scp /root/.ssh/id_rsa.pub root@spark.node2:~
[root@node1 .ssh]# scp /root/.ssh/id_rsa.pub root@spark.node3:~
##node2,node3 执行以下命令
[root@node2 ~]# mkdir -p ~/.ssh
[root@node2 ~]# cd .ssh/
[root@node2 .ssh]# cat ~/id_rsa.pub >> authorized_keys
[root@node2 .ssh]# vim /etc/ssh/sshd_config
#禁用root账户登录,如果是用root用户登录请开启
PermitRootLogin yes
3.JDK安装
#配置环境变量
[root@node1 ~]# vim /etc/profile
# 在最后下添加
# Java Environment Path
export JAVA_HOME=/opt/java/jdk1.8.0_172
export PATH=$JAVA_HOME/bin:$PATH
export CLASSPATH=.:$JAVA_HOME/lib/dt.jar:$JAVA_HOME/lib/tools.jar
# 刷新配置文件
source /etc/profile
安装Hadoop
1.安装Hadoop
[root@node1 opt]# cd /opt/ & mkdir hadoop && cd hadoop
#解压hadoop-3.1.0.tar.gz
[root@node1 hadoop]# tar -zxvf hadoop-3.1.0.tar.gz
#修改环境变量
[root@node1 hadoop]# vim /etc/profile
# 在最后下添加
export HADOOP_HOME=/opt/hadoop/hadoop-3.1.0
export PATH=$PATH:$HADOOP_HOME/bin
# 刷新配置文件
[root@node1 hadoop]# source /etc/profile
2.修改配置文件
这些配置文件全部位于 /opt/hadoop/hadoop-3.1.0/etc/hadoop 文件夹下
hadoop-env.sh
#The java implementation to use. By default, this environment
# variable is REQUIRED on ALL platforms except OS X!
# export JAVA_HOME=
export JAVA_HOME=/opt/java/jdk1.8.0_172
core-site.xml
<configuration>
<!-- 指定HDFS老大(namenode)的通信地址 -->
<property>
<name>fs.defaultFS</name>
<value>hdfs://node1.spark:9000</value>
</property>
<!-- 指定hadoop运行时产生文件的存储路径 -->
<property>
<name>hadoop.tmp.dir</name>
<value>/opt/hadoop/data/tmp</value>
</property>
</configuration>
hdfs-site.xml
<configuration>
<!-- 设置namenode的http通讯地址 -->
<property>
<name>dfs.namenode.http-address</name>
<value>node1.spark:50070</value>
</property>
<!-- 设置secondarynamenode的http通讯地址 -->
<property>
<name>dfs.namenode.secondary.http-address</name>
<value>node2.spark:50090</value>
</property>
<!-- 设置namenode存放的路径 -->
<property>
<name>dfs.namenode.name.dir</name>
<value>/opt/hadoop/data/name</value>
</property>
<!-- 设置hdfs副本数量 -->
<property>
<name>dfs.replication</name>
<value>2</value>
</property>
<!-- 设置datanode存放的路径 -->
<property>
<name>dfs.datanode.data.dir</name>
<value>/opt/hadoop/data/datanode</value>
</property>
<property>
<name>dfs.permissions</name>
<value>false</value>
</property>
</configuration>
mapred-site.xml
<configuration>
<!-- 通知框架MR使用YARN -->
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<property>
<name>mapreduce.application.classpath</name>
<value>
/opt/hadoop/hadoop-3.1.0/etc/hadoop,
/opt/hadoop/hadoop-3.1.0/share/hadoop/common/*,
/opt/hadoop/hadoop-3.1.0/share/hadoop/common/lib/*,
/opt/hadoop/hadoop-3.1.0/share/hadoop/hdfs/*,
/opt/hadoop/hadoop-3.1.0/share/hadoop/hdfs/lib/*,
/opt/hadoop/hadoop-3.1.0/share/hadoop/mapreduce/*,
/opt/hadoop/hadoop-3.1.0/share/hadoop/mapreduce/lib/*,
/opt/hadoop/hadoop-3.1.0/share/hadoop/yarn/*,
/opt/hadoop/hadoop-3.1.0/share/hadoop/yarn/lib/*
</value>
</property>
</configuration>
yarn-site.xml
<configuration>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.nodemanager.aux-services.mapreduce.shuffle.class</name>
<value>org.apache.hadoop.mapred.ShuffleHandle</value>
</property>
<property>
<name>yarn.resourcemanager.resource-tracker.address</name>
<value>node1.spark:8025</value>
</property>
<property>
<name>yarn.resourcemanager.scheduler.address</name>
<value>node1.spark:8030</value>
</property>
<property>
<name>yarn.resourcemanager.address</name>
<value>node1.spark:8040</value>
</property>
</configuration>
sbin/start-dfs.sh sbin/stop-dfs.sh
HDFS_DATANODE_USER=root
HADOOP_SECURE_DN_USER=hdfs
HDFS_NAMENODE_USER=root
HDFS_SECONDARYNAMENODE_USER=root
sbin/start-yarn.sh sbin/stop-yarn.sh
YARN_RESOURCEMANAGER_USER=root
HADOOP_SECURE_DN_USER=yarn
YARN_NODEMANAGER_USER=root
masters
新建一个masters的文件,这里指定的是secondary namenode 的主机
[root@node1 hadoop]# touch /opt/hadoop/hadoop-3.1.0/etc/hadoop/masters
[root@node1 hadoop]# vim /opt/hadoop/hadoop-3.1.0/etc/hadoop/masters
#添加
node2.spark
workers
[root@node1 hadoop]# touch /opt/hadoop/hadoop-3.1.0/etc/hadoop/slaves
[root@node1 hadoop]# vim /opt/hadoop/hadoop-3.1.0/etc/hadoop/slaves
#添加
node2.spark
node3.spark
创建文件夹
[root@node1 hadoop]# mkdir -p /opt/hadoop/data/tmp
[root@node1 hadoop]# mkdir -p /opt/hadoop/data/name
[root@node1 hadoop]# mkdir -p /opt/hadoop/data/datanode
复制到其他主机
[root@node1 opt]# scp -r /opt/hadoop spark.node2:/opt/
[root@node1 opt]# scp -r /opt/hadoop spark.node3:/opt/
修改spark.node2 spark.node3 环境变量
#修改环境变量
[root@node2 hadoop]# vim /etc/profile
# 在最后下添加
export HADOOP_HOME=/opt/hadoop/hadoop-3.1.0
export PATH=$PATH:$HADOOP_HOME/bin
# 刷新配置文件
[root@node2 hadoop]# source /etc/profile
3.启动 第一次启动得格式化
[root@node1 opt]# /opt/hadoop/hadoop-3.1.0/bin/hdfs namenode -format
启动
[root@node1 opt]# /opt/hadoop/hadoop-3.1.0/sbin/start-all.sh
来源:oschina
链接:https://my.oschina.net/u/1028790/blog/1816023