开发中需要测试kerberos认证的hdfs环境,方便模拟线上环境,hdfs单机很简单,但是加上kerberos,一方面时配置复杂,另一方面时java程序连接认证容易出错,所以总结了快速搭建kerberos认证的hdfs环境,方便开发与测试
centos 6.10 minimal安装
先安装kerberos
yum -y install krb5-libs krb5-server krb5-workstation
echo '192.168.127.131 myli' >> /etc/hosts # hostname,主机名使用ip,不用127
echo '192.168.127.131 kerberos.example.com' >> /etc/hosts
kdb5_util create -r EXAMPLE.COM -s # 另一个终端 cat /dev/sda > /dev/urandom,往随机池写入,加快速度,新建密码
kadmin.local -q "addprinc admin/admin" # 管理员,新建密码
/etc/init.d/krb5kdc start
/etc/init.d/kadmin start
kadmin.local -q 'addprinc -randkey hdfs/myli@EXAMPLE.COM' # 新建用户
kadmin.local -q 'addprinc -randkey HTTP/myli@EXAMPLE.COM'
kadmin.local -q 'xst -k hdfs.keytab hdfs/myli@EXAMPLE.COM' # 生成keytab文件
kadmin.local -q 'xst -k HTTP.keytab HTTP/myli@EXAMPLE.COM'
klist -kt hdfs.keytab # keytab中的用户列表
kinit -kt hdfs.keytab hdfs/myli@EXAMPLE.COM # 指定用户登陆
klist # 列出已登陆用户
kdestroy # 退出
安装与配置hadoop,使用kerberos认证
useradd hdfs # 使用jsvc安全启动datanode,必须用普通用户
cp hdfs.keytab /home/hdfs/
cp HTTP.keytab /home/hdfs/
chown hdfs:hdfs /home/hdfs/*.keytab
yum -y install java-1.8.0-openjdk-devel java
yum -y groupinstall 'Development Tools' # 编译jsvc
su - hdfs
wget https://archive.apache.org/dist/hadoop/common/hadoop-2.7.1/hadoop-2.7.1.tar.gz
wget https://archive.apache.org/dist/commons/daemon/binaries/commons-daemon-1.0.15-bin.tar.gz
wget https://archive.apache.org/dist/commons/daemon/source/commons-daemon-1.0.15-src.tar.gz
tar xf hadoop-2.7.1.tar.gz
tar xf commons-daemon-1.0.15-bin.tar.gz
tar xf commons-daemon-1.0.15-src.tar.gz
cd commons-daemon-1.0.15-src/src/native/unix/
./configure --with-java=/usr/lib/jvm/java-openjdk
make
cp jsvc ~/hadoop-2.7.1/libexec/
cd
rm ~/hadoop-2.7.1/share/hadoop/hdfs/lib/commons-daemon-1.0.13.jar
cp commons-daemon-1.0.15/commons-daemon-1.0.15.jar ~/hadoop-2.7.1/share/hadoop/hdfs/lib/
cd hadoop-2.7.1
# etc/hadoop/hadoop-env.sh
sed -i 's/JAVA_HOME=.*/JAVA_HOME=\/usr\/lib\/jvm\/java-openjdk/g' etc/hadoop/hadoop-env.sh
sed -i 's/#.*JSVC_HOME=.*/export JSVC_HOME=\/home\/hdfs\/hadoop-2.7.1\/libexec/g' etc/hadoop/hadoop-env.sh
sed -i 's/HADOOP_SECURE_DN_USER=.*/HADOOP_SECURE_DN_USER=hdfs/g' etc/hadoop/hadoop-env.sh
# core-site.xml
sed -i '19,$d' etc/hadoop/core-site.xml
echo '<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://192.168.127.131:9000</value>
</property>
<property>
<name>hadoop.security.authentication</name>
<value>kerberos</value>
</property>
<property>
<name>hadoop.security.authorization</name>
<value>true</value>
</property>
</configuration>' >> etc/hadoop/core-site.xml
# hdfs-site.xml
sed -i '19,$d' etc/hadoop/hdfs-site.xml
echo '<configuration>
<property>
<name>dfs.replication</name>
<value>1</value>
</property>
<property>
<name>dfs.block.access.token.enable</name>
<value>true</value>
</property>
<property>
<name>dfs.datanode.data.dir.perm</name>
<value>700</value>
</property>
<property>
<name>dfs.namenode.keytab.file</name>
<value>/home/hdfs/hdfs.keytab</value>
</property>
<property>
<name>dfs.namenode.kerberos.principal</name>
<value>hdfs/myli@EXAMPLE.COM</value>
</property>
<property>
<name>dfs.namenode.kerberos.https.principal</name>
<value>HTTP/myli@EXAMPLE.COM</value>
</property>
<property>
<name>dfs.datanode.address</name>
<value>0.0.0.0:1004</value>
</property>
<property>
<name>dfs.datanode.http.address</name>
<value>0.0.0.0:1006</value>
</property>
<property>
<name>dfs.datanode.keytab.file</name>
<value>/home/hdfs/hdfs.keytab</value>
</property>
<property>
<name>dfs.datanode.kerberos.principal</name>
<value>hdfs/myli@EXAMPLE.COM</value>
</property>
<property>
<name>dfs.datanode.kerberos.https.principal</name>
<value>HTTP/myli@EXAMPLE.COM</value>
</property>
<property>
<name>dfs.webhdfs.enabled</name>
<value>true</value>
</property>
<property>
<name>dfs.web.authentication.kerberos.principal</name>
<value>HTTP/myli@EXAMPLE.COM</value>
</property>
<property>
<name>dfs.web.authentication.kerberos.keytab</name>
<value>/home/hdfs/HTTP.keytab</value>
</property>
<property>
<name>dfs.encrypt.data.transfer</name>
<value>true</value>
</property>
<property>
<name>dfs.encrypt.data.transfer</name>
<value>true</value>
</property>
</configuration>' >> etc/hadoop/hdfs-site.xml
# 本机ssh登陆
ssh-keygen -t rsa -N '' -f ~/.ssh/id_rsa
cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys
chmod 600 ~/.ssh/authorized_keys
ssh localhost date # 测试登陆,打印登陆日期,但不登录,不用退出
ssh myli date
ssh 0.0.0.0 date
ssh 192.168.127.131 date
ssh kerberos.example.com date
bin/hdfs namenode -format
sbin/start-dfs.sh
# 用root执行下面几个
service iptables stop
cd /home/hdfs/hadoop-2.7.1
sbin/hadoop-daemon.sh start datanode # 开启kerberos后需要单独启动datanode
错误日志
tail logs/jsvc.err
tail logs/hadoop-hdfs-datanode-myli.log
tail logs/hadoop-hdfs-namenode-myli.log
jps # 确认有三个进程,jps, NameNode, 没有名字进程
kinit -kt ~/hdfs.keytab hdfs/myli@EXAMPLE.COM # 登陆kerberos后才能执行hdfs命令
bin/hdfs dfs -ls /
bin/hdfs dfs -put README.txt /
bin/hdfs dfs -put README.txt /rrr.txt
bin/hdfs dfsadmin -report
sbin/hadoop-daemon.sh stop datanode # root
sbin/stop-dfs.sh
# 使用java的测试控制器测试连接
curl 'http://172.24.1.24:8080/hdfs?path=/&user=hdfs/myli@EXAMPLE.COM&url=hdfs://192.168.127.131:9000&keyTab=hdfs.keytab'
curl 'http://172.24.1.24:8080/rHdfs?filePath=/README.txt'
# 多次格式namenode导致clusterid不一致时,看
cat /tmp/hadoop-hdfs/dfs/data/current/VERSION
cat /tmp/hadoop-root/dfs/name/current/VERSION
配置中注意的几点
- 当前主机名使用访问ip,不能是127.0.0.1或localhost,因为hadoop使用主机名做访问节点地址
- core-site.xml中defaultFS使用访问ip,外部应用连接hdfs地址
- hdfs-site.xml中,datanode.address使用0.0.0.0, 无法put文件时,可能是namenode与datanode通信问题
来源:CSDN
作者:qianggetaba
链接:https://blog.csdn.net/c5113620/article/details/104051134