一、安装部署
此处是单机版安装,我安装的使hadoop3.1.3,因为hadoop2.x和hadoop3.x的安装部署有些不同,所以记录一下
首先安装好jdk,并且准备好hadoop3.x的安装包,可以去官网下载
openssh安装,便于远程上传文件(每台主机都做)
[root@node03 ~]# yum -y install openssh-clients
同步时间工具(每台主机都做)
#安装ntpdate工具
[root@node03 ~]# yum -y install ntp ntpdate
#设置与网络时间同步
[root@node03 ~]# ntpdate cn.pool.ntp.org
#系统时间写入硬件时间
[root@node03 ~]# hwclock --systohc
文件上传(rz)和下载(sz)工具
[root@node03 ~]# yum -y install lrzsz
安装网络下载工具(一台即可)
**[root@node03 ~]# yum -y install wget**
关闭防火墙(每台都做)
#查看防火墙状态
[root@node03 ~]# systemctl status firewalld
#关闭防火墙
[root@node03 ~]# systemctl stop firewalld
#禁止开机启动防火墙
[root@node03 ~]# systemctl disable firewalld
#开启防火墙
[root@node03 ~]# systemctl start firewalld
#设置开启启动防火墙
[root@node03 ~]# systemctl enable firewalld
#重启防火墙
[root@node03 ~]# systemctl restart firewalld
配置SSH免密登录
#修改hosts文件
[root@node03 ~]# vim /etc/hosts
#添加ip和主机名,根据自己的情况有几台添加几台
192.168.17.126 node03
配置 SSH
#每台都先用ssh 主机名 执行,在主目录产生一个.ssh文件夹
#输入no
[root@node03 ~]# ssh node03
#进入到.ssh目录进行操作
[root@node03 ~]# cd ~/.ssh
#输入命令,一路回车,产生公钥和私钥
[root@node03 .ssh]# ssh-keygen -t rsa -P ''
[root@node03 .ssh]# cp id_rsa.pub authorized_keys
#如果是多台进行免密可以执行下面括号命令 否则跳过
(
#将所有节点authorized_keys进行合并
[root@node03 .ssh]# cat ~/.ssh/authorized_keys | ssh root@node01 'cat >> ~/.ssh/authorized_keys'
[root@node02 .ssh]# cat ~/.ssh/authorized_keys | ssh root@node01 'cat >> ~/.ssh/authorized_keys'
#分发到其他主机
[root@node01 .ssh]# scp ~/.ssh/authorized_keys root@node02:~/.ssh/
[root@node01 .ssh]# scp ~/.ssh/authorized_keys root@node03:~/.ssh/
)
安装JDK
卸载原JDK
#检查当前安装的jdk版本
[root@node03 ~]# rpm -qa|grep jdk
#如果什么都没有,说明没有安装jdk 无需卸载,否则要卸载
#卸载命令
[root@node03 ~]# yum -y remove 需要卸载的jdk文件名
安装JDK
#创建一个目录来存放jdk
[root@node03 ~]# mkdir -p /opt/module/Java/
进到Java目录下
[root@node03 ~]# cd /opt/module/Java/
#使用rz命令将安装包从windows主机上上传到node03上
[root@node03 Java]# rz
#解压到当前目录
[root@node03 Java]# tar -zxvf jdk-8u212-linux-x64.tar.gz
#配置JDK的环境变量
[root@node03 Java]# vi /etc/profile
#jdk环境变量
export JAVA_HOME=/opt/module/Java/jdk1.8.0_212
export JRE_HOME=/opt/module/Java/jdk1.8.0_212/jre
export CLASSPATH=.:$JAVA_HOME/lib/dt.jar:$JAVA_HOME/lib/tools.jar:$JRE_HOME/lib/rt.jar
export PATH=$PATH:$JAVA_HOME/bin:$JRE_HOME/bin
#使刚刚的设置立即生效
[root@node03 Java]# source /etc/profile
#检测是否配置成功
[root@node03 Java]# java -version
java version "1.8.0_212"
Java(TM) SE Runtime Environment (build 1.8.0_212-b10)
Java HotSpot(TM) 64-Bit Server VM (build 25.212-b10, mixed mode)
安装Hadoop
#创建目录
[root@node03 ~]# mkdir -p /opt/module/Hadoop/
#进入到Hadoop目录下
[root@node03 ~]# cd /opt/module/Hadoop/
#用rz上传hadoop的安装包到node03
[root@node03 Hadoop]# rz
#解压到当前目录
[root@node03 Hadoop]# tar -zxvf hadoop-3.1.3.tar.gz
#配置Hadoop环境变量
#hadoop环境变量
export HADOOP_HOME=/opt/module/Hadoop/hadoop-3.1.3
export HADOOP_INSTALL=$HADOOP_HOME
export HADOOP_MAPRED_HOME=$HADOOP_HOME
export HADOOP_HDFS_HOME=$HADOOP_HOME
export HADOOP_COMMON_HOME=$HADOOP_HOME
export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop
export PATH=$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$PATH
配置hadoop的配置文件
#进到hadoop配置文件所在的目录
[root@node03 Hadoop]# cd /opt/module/Hadoop/hadoop-3.1.3/etc/hadoop
修改hadoop-env.sh文件
[root@node03 hadoop]# vi hadoop-env.sh
#修改以下内容
export JAVA_HOME=/opt/module/Java/jdk1.8.0_212
export HADOOP_HOME=/opt/module/Hadoop/hadoop-3.1.3
export PATH=$PATH:${HADOOP_HOME}/bin
export HADOOP_OPTS="-Djava.library.path=${HADOOP_HOME}/lib/native"
export HADOOP_LOG_DIR=${HADOOP_HOME}/logs
#PID存放目录,若没有此配置则默认存放在tmp临时文件夹中,在启动和关闭HDFS时可能会报错
export HADOOP_PID_DIR=${HADOOP_HOME}/pids
#先注释掉,有问题可以打开,将调试信息打印在console上
#export HADOOP_ROOT_LOGGER=DEBUG,console
修改yarn-env.sh文件
[root@node03 hadoop]# vi yarn-env.sh
#在最后添加以下内容
YARN_RESOURCEMANAGER_USER=root
HADOOP_SECURE_DN_USER=yarn
YARN_NODEMANAGER_USER=root
修改yarn-env.sh
[root@node03 hadoop]# vi yarn-env.sh
#添加以下内容
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://localhost:9000</value>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>file:/opt/module/Hadoop/hadoop-3.1.3/tmp</value>
</property>
</configuration>
修改hdfs-site.xml 文件
[root@node03 hadoop]# vi hdfs-site.xml
#添加以下内容
<configuration>
<property>
<name>dfs.replication</name>
<value>1</value>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>/opt/module/Hadoop/hadoop-3.1.3/dfs/data</value>
</property>
<property>
<name>dfs.namenode.name.dir</name>
<value>/opt/module/Hadoop/hadoop-3.1.3/dfs/name</value>
</property>
<property>
<name>dfs.permissions</name>
<value>false</value>
</property>
</configuration>
修改mapred-site.xml文件
[root@node03 hadoop]# vi mapred-site.xml
#添加以下内容
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<property>
<name>mapreduce.application.classpath</name>
<value>$HADOOP_MAPRED_HOME/share/hadoop/mapreduce/*:$HADOOP_MAPRED_HOME/share/hadoop/mapreduce/lib/*</value>
</property>
<property>
<name>mapred.job.tracker</name>
<value>node03:9001</value>
</property>
</configuration>
修改yarn-site.xml文件
[root@node03 hadoop]# vi yarn-site.xml
#添加以下内容
<configuration>
<!-- Site specific YARN configuration properties -->
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.nodemanager.env-whitelist</name>
<value>JAVA_HOME,HADOOP_COMMON_HOME,HADOOP_HDFS_HOME,HADOOP_CONF_DIR,CLASSPATH_PREPEND_DISTCACHE,HADOOP_YARN_HOME,HADOOP_MAPRED_HOME</value>
</property>
<property>
<name>yarn.resourcemanager.hostname</name>
<value>node03</value>
</property>
<property>
<name>yarn.log-aggregation-enable</name>
<value>true</value>
</property>
<property>
<name>yarn.log-aggregation.retain-seconds</name>
<value>604800</value>
</property>
</configuration>
修改workers文件(对应hadoop2.x中的slaves)
[root@node03 hadoop]# vi workers
#把默认的localhost删掉,添加数据节点子机的主机名,根据自己的情况添加
node03
修改sbin下start-dfs.sh和stop-dfs.sh文件
[root@node03 hadoop]# cd /opt/module/Hadoop/hadoop-3.1.3/sbin/
[root@node03 sbin]# vi start-dfs.sh
#在文件上面添加如下内容
#!/usr/bin/env bash
HDFS_DATANODE_USER=root
HDFS_DATANODE_SECURE_USER=hdfs
HDFS_NAMENODE_USER=root
HDFS_SECONDARYNAMENODE_USER=root
[root@node03 sbin]# vi stop-dfs.sh
#在文件上面添加如下内容
#!/usr/bin/env bash
HDFS_DATANODE_USER=root
HDFS_DATANODE_SECURE_USER=hdfs
HDFS_NAMENODE_USER=root
HDFS_SECONDARYNAMENODE_USER=root
修改sbin下start-yarn.sh和stop-yarn.sh文件
[root@node03 sbin]# vi start-yarn.sh
#在文件上面添加如下内容
#!/usr/bin/env bash
RN_RESOURCEMANAGER_USER=root
HADOOP_SECURE_DN_USER=yarn
YARN_NODEMANAGER_USER=root
[root@node03 sbin]# vi stop-yarn.sh
#在文件上面添加如下内容
#!/usr/bin/env bash
RN_RESOURCEMANAGER_USER=root
HADOOP_SECURE_DN_USER=yarn
YARN_NODEMANAGER_USER=root
namenode格式化
如果多台 每台分别格式化
[root@node03 ~]# hadoop namenode -format
开启服务(如果是多台在主机上开启就可以了)
[root@node03 ~]# start-all.sh
Starting namenodes on [localhost]
上一次登录:二 12月 24 17:10:42 CST 2019pts/7 上
Starting datanodes
上一次登录:二 12月 24 17:10:53 CST 2019pts/7 上
Starting secondary namenodes [node03]
上一次登录:二 12月 24 17:10:55 CST 2019pts/7 上
Starting resourcemanager
上一次登录:二 12月 24 17:10:59 CST 2019pts/7 上
Starting nodemanagers
上一次登录:二 12月 24 17:11:03 CST 2019pts/7 上
#使用jps查看节点
[root@node03 ~]# jps
15459 Jps
14361 NameNode
14683 SecondaryNameNode
14493 DataNode
14957 ResourceManager
15101 NodeManager
官方自带wordcount进行测试mapreduce程序
编写本地文件
#创建一个文件夹存放本地文件
[root@node03 ~]# mkdir -p /opt/module/mydata
#进入到mydata目录
[root@node03 ~]# cd /opt/module/mydata
#创建一个文件并添加内容
[root@node03 mydata]# vi word.txt
#随便添加几个单词,单词之间用空格分割
I am student
ni hao
haha ha
把本地文件上传到HDFS中
#创建hdfs文件目录
[root@node03 mydata]# hadoop fs -mkdir -p /hyk/data/input
#把本地文件上传到分布式文件存储系统中
[root@node03 mydata]# hadoop fs -put word.txt /hyk/data/input
2019-12-24 17:22:52,240 INFO sasl.SaslDataTransferClient: SASL encryption trust check: localHostTrusted = false, remoteHostTrusted = false
测试mapreduce
[root@node03 mydata]# hadoop jar /opt/module/Hadoop/hadoop-3.1.3/share/hadoop/mapreduce/hadoop-mapreduce-examples-3.1.3.jar wordcount /hyk/data/input /hyk/data/output
2019-12-24 17:27:48,152 INFO client.RMProxy: Connecting to ResourceManager at node03/192.168.17.128:8032
2019-12-24 17:27:48,645 INFO mapreduce.JobResourceUploader: Disabling Erasure Coding for path: /tmp/hadoop-yarn/staging/root/.staging/job_1577178667536_0001
2019-12-24 17:27:48,785 INFO sasl.SaslDataTransferClient: SASL encryption trust check: localHostTrusted = false, remoteHostTrusted = false
2019-12-24 17:27:48,918 INFO input.FileInputFormat: Total input files to process : 1
2019-12-24 17:27:48,967 INFO sasl.SaslDataTransferClient: SASL encryption trust check: localHostTrusted = false, remoteHostTrusted = false
2019-12-24 17:27:49,402 INFO sasl.SaslDataTransferClient: SASL encryption trust check: localHostTrusted = false, remoteHostTrusted = false
2019-12-24 17:27:49,818 INFO mapreduce.JobSubmitter: number of splits:1
2019-12-24 17:27:49,972 INFO sasl.SaslDataTransferClient: SASL encryption trust check: localHostTrusted = false, remoteHostTrusted = false
2019-12-24 17:27:50,392 INFO mapreduce.JobSubmitter: Submitting tokens for job: job_1577178667536_0001
2019-12-24 17:27:50,393 INFO mapreduce.JobSubmitter: Executing with tokens: []
2019-12-24 17:27:50,581 INFO conf.Configuration: resource-types.xml not found
2019-12-24 17:27:50,581 INFO resource.ResourceUtils: Unable to find 'resource-types.xml'.
2019-12-24 17:27:50,968 INFO impl.YarnClientImpl: Submitted application application_1577178667536_0001
2019-12-24 17:27:51,015 INFO mapreduce.Job: The url to track the job: http://node03:8088/proxy/application_1577178667536_0001/
2019-12-24 17:27:51,015 INFO mapreduce.Job: Running job: job_1577178667536_0001
2019-12-24 17:27:59,251 INFO mapreduce.Job: Job job_1577178667536_0001 running in uber mode : false
2019-12-24 17:27:59,260 INFO mapreduce.Job: map 0% reduce 0%
2019-12-24 17:28:04,336 INFO mapreduce.Job: map 100% reduce 0%
2019-12-24 17:28:10,385 INFO mapreduce.Job: map 100% reduce 100%
2019-12-24 17:28:11,399 INFO mapreduce.Job: Job job_1577178667536_0001 completed successfully
2019-12-24 17:28:11,493 INFO mapreduce.Job: Counters: 53
File System Counters
FILE: Number of bytes read=95
FILE: Number of bytes written=435315
FILE: Number of read operations=0
FILE: Number of large read operations=0
FILE: Number of write operations=0
HDFS: Number of bytes read=152
HDFS: Number of bytes written=57
HDFS: Number of read operations=8
HDFS: Number of large read operations=0
HDFS: Number of write operations=2
Job Counters
Launched map tasks=1
Launched reduce tasks=1
Data-local map tasks=1
Total time spent by all maps in occupied slots (ms)=3208
Total time spent by all reduces in occupied slots (ms)=2989
Total time spent by all map tasks (ms)=3208
Total time spent by all reduce tasks (ms)=2989
Total vcore-milliseconds taken by all map tasks=3208
Total vcore-milliseconds taken by all reduce tasks=2989
Total megabyte-milliseconds taken by all map tasks=3284992
Total megabyte-milliseconds taken by all reduce tasks=3060736
Map-Reduce Framework
Map input records=4
Map output records=8
Map output bytes=73
Map output materialized bytes=95
Input split bytes=110
Combine input records=8
Combine output records=8
Reduce input groups=8
Reduce shuffle bytes=95
Reduce input records=8
Reduce output records=8
Spilled Records=16
Shuffled Maps =1
Failed Shuffles=0
Merged Map outputs=1
GC time elapsed (ms)=146
CPU time spent (ms)=1090
Physical memory (bytes) snapshot=286482432
Virtual memory (bytes) snapshot=5045534720
Total committed heap usage (bytes)=138194944
Peak Map Physical memory (bytes)=186015744
Peak Map Virtual memory (bytes)=2519392256
Peak Reduce Physical memory (bytes)=100466688
Peak Reduce Virtual memory (bytes)=2526142464
Shuffle Errors
BAD_ID=0
CONNECTION=0
IO_ERROR=0
WRONG_LENGTH=0
WRONG_MAP=0
WRONG_REDUCE=0
File Input Format Counters
Bytes Read=42
File Output Format Counters
Bytes Written=57
查看统计结果
[root@node03 mydata]# hadoop fs -cat /hyk/data/output/*
2019-12-24 17:32:29,752 INFO sasl.SaslDataTransferClient: SASL encryption trust check: localHostTrusted = false, remoteHostTrusted = false
I 1
am 1
ha 1
haha 1
hao 1
ni 1
student 1
Hadoop3.x部署完成
如有问题可以留言交流共同进步
来源:CSDN
作者:hykDatabases
链接:https://blog.csdn.net/weixin_45102492/article/details/103683257