centos7 hadoop-2.7.3完全分布式安装配置
操作系统版本:
centos7 64位
hadoop版本:
hadoop-2.7.3
hbase版本:
hbase-1.2.4
机器:
192.168.11.131 master1 Namenode ResourceManager QuorumPeerMain Jobhistory HMaster DFSZKFailoverController
192.168.11.132 master2 Namenode HMaster DFSZKFailoverController
192.168.11.133 slave1 Datanode HRegionServer NodeManager JournalNode
192.168.11.134 slave2 Datanode HRegionServer NodeManager JournalNode
192.168.11.135 slave3 Datanode HRegionServer NodeManager JournalNode
所有节点关闭防火墙及selinux
# firewall-cmd --state
running
# systemctl stop firewalld.service
# systemctl disable firewalld.service
# setenforce 0
# vi /etc/sysconfig/selinux
SELINUX=enforcing --> disabled
所有节点配置yum源
# cd
# mkdir apps
http://mirrors.163.com/centos/7/os/x86_64/Packages/wget-1.14-15.el7.x86_64.rpm
# rpm -i wget-1.14-15.el7.x86_64.rpm
# cd /etc/yum.repos.d
# wget http://mirrors.aliyun.com/repo/Centos-7.repo
# mv Centos-7.repo CentOS-Base.repo
# scp CentOS-Base.repo root@192.168.11.131:/etc/yum.repos.d/
# scp CentOS-Base.repo root@192.168.11.132:/etc/yum.repos.d/
# scp CentOS-Base.repo root@192.168.11.133:/etc/yum.repos.d/
# scp CentOS-Base.repo root@192.168.11.134:/etc/yum.repos.d/
# yum clean all
# yum makecache
# yum update
配置ntp时间同步
所有节点安装ntp
# yum install -y ntp
ntp server端:
# date -s "2018-05-27 23:03:30"
# vi /etc/ntp.conf
在注释下添加两行
#restrict 192.168.1.0 mask 255.255.255.0 nomodify notrap
server 127.127.1.0
fudge 127.127.1.0 stratum 11
注释下面
#server 0.centos.pool.ntp.org iburst
#server 1.centos.pool.ntp.org iburst
#server 2.centos.pool.ntp.org iburst
#server 3.centos.pool.ntp.org iburst
# systemctl start ntpd.service
# systemctl enable ntpd.service
ntp客户端(其余四台都为ntp客户端):
# vi /etc/ntp.conf
同样注释下添加两行
#restrict 192.168.1.0 mask 255.255.255.0 nomodify notrap
server 192.168.11.131
fudge 127.127.1.0 stratum 11
四行添加注释
#server 0.centos.pool.ntp.org iburst
#server 1.centos.pool.ntp.org iburst
#server 2.centos.pool.ntp.org iburst
#server 3.centos.pool.ntp.org iburst
# systemctl start ntpd.service
# systemctl enable ntpd.service
# ntpdate 192.168.11.131
28 May 07:04:50 ntpdate[1714]: the NTP socket is in use, exiting
# lsof -i:123
-bash: lsof: command not found
# yum install -y lsof
# lsof -i:123
COMMAND PID USER FD TYPE DEVICE SIZE/OFF NODE NAME
ntpd 1693 ntp 16u IPv4 25565 0t0 UDP *:ntp
ntpd 1693 ntp 17u IPv6 25566 0t0 UDP *:ntp
ntpd 1693 ntp 18u IPv4 25572 0t0 UDP localhost:ntp
ntpd 1693 ntp 19u IPv4 25573 0t0 UDP localhost.localdomain:ntp
ntpd 1693 ntp 20u IPv6 25574 0t0 UDP localhost:ntp
ntpd 1693 ntp 21u IPv6 25575 0t0 UDP localhost.localdomain:ntp
# kill -9 1693
# ntpdate 192.168.11.131
27 May 23:06:14 ntpdate[1728]: step time server 192.168.11.131 offset -28808.035509 sec
# date
Sun May 27 23:06:17 CST 2018
所有节点修改主机名(永久)
# hostnamectl set-hostname master1~slave3
临时修改主机名
# hostname master1~slave3
主节点修改hosts文件
# vi /etc/hosts
192.168.11.131 master1
192.168.11.132 master2
192.168.11.133 slave1
192.168.11.134 slave2
192.168.11.135 slave3
把hosts文件覆盖到其他机器
# scp /etc/hosts root@192.168.11.132~135:/etc/
所有节点创建管理用户和组
创建组和用户
# groupadd hduser
# useradd -g hduser hduser
# passwd hduser
创建目录并赋权
每台机器上创建如下文件夹
# mkdir /data1
# mkdir /data2
修改权限
# chown hudser:hduser /data1
# chown hudser:hduser /data2
# su hduser
$ mkdir -p /data1/hadoop_data/hdfs/namenode
$ mkdir -p /data2/hadoop_data/hdfs/namenode
$ mkdir -p /data1/hadoop_data/hdfs/datanode(NameNode不要)
$ mkdir -p /data2/hadoop_data/hdfs/datanode(NameNode不要)
$ mkdir -p /data1/hadoop_data/pids
$ mkdir -p /data2/hadoop_data/pids
$ mkdir -p /data1/hadoop_data/hadoop_tmp
$ mkdir -p /data2/hadoop_data/hadoop_tmp
无密验证
master1和master2节点操作
# su - hduser
$ ssh-keygen -t rsa
$ cd ~/.ssh
$ cat id_rsa.pub >> authorized_keys
master1节点操作
$ ssh-copy-id -i ~/.ssh/id_rsa.pub hadoop@master2
master2节点操作
$ scp ~/.ssh/authorized_keys hduser@master1:~/.ssh/
slave1、slave2和slave3节点创建.ssh目录
# mkdir /home/hduser/.ssh
# chown hduser:hduser /home/hduser/.ssh
master1节点操作
$ scp ~/.ssh/authorized_keys hduser@slave1:~/.ssh
$ scp ~/.ssh/authorized_keys hduser@slave2:~/.ssh
$ scp ~/.ssh/authorized_keys hduser@slave3:~/.ssh
master1和master2节点验证
验证方法,分别在两个节点,ssh登陆本机(hdusser用户)及其他四个节点,看看是不是无密登陆。
如果未通过验证,所有机器执行下面命令
$ chmod 600 ~/.ssh/authorized_keys
$ chmod 700 ~/.ssh
所有节点配置java环境
$ mkdir -p /data1/usr/src
上传包到/data1/usr/src目录下
$ cd /data1/usr/src
$ tar xf jdk1.7.0_79.tar -C /data1/usr/
$ vi ~/.bashrc
export JAVA_HOME=/data1/usr/jdk1.7.0_79
export JRE_HOME=$JAVA_HOME/jre
export CLASSPATH=.:$JAVA_HOME/lib/dt.jar:$JAVA_HOME/lib/tools.jar:$JRE_HOME/lib/rt.jar
export PATH=$PATH:$JAVA_HOME/bin
$ source ~/.bashrc
mastar1节点配置hadoop(hdsuer用户)
下载hadoop-2.7.3.tar.gz,上传到/data1/usr/src
http://mirrors.cnnic.cn/apache/hadoop/common/hadoop-2.7.3/hadoop-2.7.3.tar.gz
$ cd /data1/usr/src
$ tar -zxf hadoop-2.7.3.tar.gz -C /data1/usr/
$ vi /data1/usr/hadoop-2.7.3/etc/hadoop/hadoop-env.sh
export JAVA_HOME=/data1/usr/jdk1.7.0_79
export HADOOP_PID_DIR=/data1/hadoop_data/pids
export HADOOP_PID_DIR=/data2/hadoop_data/pids
export HADOOP_MAPRED_PID_DIR=/data1/hadoop_data/pids
$ vi /data1/usr/hadoop-2.7.3/etc/hadoop/mapred-env.sh
export HADOOP_MAPRED_PID_DIR=/data2/hadoop_data/pids
$ vi /data1/usr/hadoop-2.7.3/etc/hadoop/yarn-env.sh
export YARN_PID_DIR=/data2/hadoop_data/pids
$ vi /data1/usr/hadoop-2.7.3/etc/hadoop/core-site.xml
$ vi /data1/usr/hadoop-2.7.3/etc/hadoop/hdfs-site.xml
sshfence
shell(/bin/true)
$ vi /data1/usr/hadoop-2.7.3/etc/hadoop/yarn-site.xml
$ cp /data1/usr/hadoop-2.7.3/etc/hadoop/mapred-site.xml.template /data1/usr/hadoop-2.7.3/etc/hadoop/mapred-site.xml
$ vi /data1/usr/hadoop-2.7.3/etc/hadoop/mapred-site.xml
$ vi /data1/usr/hadoop-2.7.3/etc/hadoop/slaves
slave3
slave4
slave5
$ for ip in `seq 2 5`;do scp -rpq /data1/usr/hadoop-2.7.3 192.168.11.13$ip:/data1/usr;done
各节点zookeeper配置
http://archive.apache.org/dist/zookeeper/zookeeper-3.4.6/zookeeper-3.4.6.tar.gz
上传包到/data1/usr/src目录下
创建目录
$ mkdir -p /home/hduser/storage/zookeeper
$ cd /data1/usr/src
$ tar -zxf zookeeper-3.4.6.tar.gz -C /data1/usr
$ cp /data1/usr/zookeeper-3.4.6/conf/zoo_sample.cfg /data1/usr/zookeeper-3.4.6/conf/zoo.cfg
$ vi /data1/usr/zookeeper-3.4.6/conf/zoo.cfg
dataDir=/home/hduser/storage/zookeeper
server.1=master1:2888:3888
server.2=master2:2888:3888
server.3=slave1:2888:3888
server.4=slave2:2888:3888
server.5=slave3:2888:3888
master1-slave3各节点依次做操作
$ echo "1" > /home/hduser/storage/zookeeper/myid
$ echo "2" > /home/hduser/storage/zookeeper/myid
$ echo "3" > /home/hduser/storage/zookeeper/myid
$ echo "4" > /home/hduser/storage/zookeeper/myid
$ echo "5" > /home/hduser/storage/zookeeper/myid
$ cd /data1/usr/zookeeper-3.4.6/bin
$ ./zkServer.sh start
slave1、slave2和slave3启动journalnode
$ cd /data1/usr/hadoop-2.7.3/sbin
$ ./sbin/hadoop-daemon.sh start journalnode
用jps确认启动结果
在master1上格式化zookeeper节点格式化(第一次)
$ cd /data1/usr/hadoop-2.7.3
$ ./bin/hdfs zkfc -formatZK
在master1上执行命令:
./bin/hadoop namenode -format
在master1上启动namenode
./sbin/hadoop-daemon.sh start namenode
需要在master2(备节点)上执行数据同步
./bin/hdfs namenode -bootstrapStandby
scp -r /data2/hadoop_data/hdfs/namenode hduser@mster2:/data2/hadoop_data/hdfs/
在master2上启动namenode
./sbin/hadoop-daemon.sh start namenode
设置master1为active
./bin/hdfs haadmin -transitionToActive master1
./bin/hdfs haadmin -getServiceState master1
在master1上启动datanode
./sbin/hadoop-daemons.sh start datanode
启动HDFS(第二次以后)
在master1上执行命令:
./sbin/start-dfs.sh
启动YARN
在master1上执行命令:
./sbin/start-yarn.sh
验证
验证namenode
http://master1:50070
Overview 'master1:9000' (active)
http://master2:50070
Overview 'master2:9000' (standby)
上传文件
./bin/hadoop fs -put /data1/usr/hadoop-2.7.3/etc/hadoop /test
./bin/hadoop fs -ls /test
namenode的备份验证
杀死master1,master2变为active
验证yarn
./bin/hadoop jar /data1/usr/hadoop-2.7.3/share/hadoop/mapreduce/hadoop-mapreduce-examples-2.7.3.jar wordcount /test/hadoop /test/out
安装HBASE
master1节点操作:
下载hbase-1.2.4-bin.tar.gz,解压
$ cd /data1/usr/src
$ tar -zxvf hbase-1.2.4-bin.tar.gz -C /data1/usr/
$ mkdir -p /data1/hadoop_data/hbase_tmp
$ mkdir -p /data2/hadoop_data/hbase_tmp
配置master1的hbase环境
配置hbase-env.sh
$ vi /data1/usr/hbase-1.2.4/conf/hbase-env.sh
export JAVA_HOME=/data1/usr/jdk1.7.0_79
export HBASE_PID_DIR=/data2/hadoop_data/pids
export HBASE_MANAGES_ZK=false
export HADOOP_HOME=/data1/usr/hadoop-2.7.3
配置hbase-site.xml
$ vi /data1/usr/hbase-1.2.4/conf/hbase-site.xml
配置regionservers
$ vi /data1/usr/hbase-1.2.4/conf/regionservers
slave1
slave2
slave3
配置backup-masters
$ vi /data1/usr/hbase-1.2.4/conf/backup-masters
移除 HBase 里面的不必要 log4j 的 jar 包
cd ${HBASE_HOME}/lib
mv slf4j-log4j12-1.7.5.jar slf4j-log4j12-1.7.5.jar.bak
将master1的hbase环境传输到其他节点
$ for ip in `seq 2 5`;do scp -rpq /data1/usr/hbase-1.2.4 192.168.11.13$ip:/data1/usr;done
启动顺序
按hadoop集群的启动步骤,启动hadoop集群
master1上启动Hbase
$ cd /data1/usr/hbase-1.2.4/bin
$ ./start-hbase.sh
验证
$ /data1/usr/hadoop-2.7.3/bin/hadoop fs -ls / 查看hbase是否在HDFS文件系统创建成功
执行: bin/hbase shell 可以进入Hbase管理界面、
输入 status 查看状态
创建表
create 'test', 'cf'
显示表信息
list 'test'
表中插入数据
put 'test', 'row1', 'cf:a', 'value1'
put 'test', 'row2', 'cf:b', 'value2'
put 'test', 'row3', 'cf:c', 'value3'
查询表
scan 'test'
取一行数据
get 'test', 'row1'
失效表
disable 'test'
删除表
drop 'test'
浏览器输入http://master1:16010可以打开Hbase管理界面
http://192.168.11.131/master-status
启动thrift2
hbase-daemons.sh start thrift2
去datanode节点用jps确认