haizdl
作者haizdl·2016-05-30 21:31
技术经理·大连

Hadoop Zookeeper Hbase Hive 分布式集群搭建实例

字数 19072阅读 5153评论 1赞 0

1 基础架构

图片1.png

2 运行机制

图片2.png

3 Hadoop 集群搭建

----------------------------------------

环境描述

----------------------------------------

RedhatLinux 7.1(Node50=namenode;Node51-53=datanode)

hadoop-2.7.1.tar.gz

jdk-8u77-linux-x64.tar.gz

----------------------------------------

环境配置

----------------------------------------

1. Add hadoop application user.(Node50 & Node51 & Node52 & Node53)

# groupadd hadoop

# useradd -G hadoop hadoop

# passwd hadoop

2. 互信配置

node50-node53: "/etc/hosts"

192.168.239.50 node50

192.168.239.51 node51

192.168.239.52 node52

192.168.239.53 node53

"node50-node53: /home/hadoop/.ssh/authorized_keys"

# su - hadoop

# ssh-keygen -t rsa

# cd /home/hadoop/.ssh

# cp id_rsa.pub authorized_keys

"node50:"

# scp hadoop@node51:/home/hadoop/.ssh/id_rsa.pub /tmp/id_rsa.pub.node51

# scp hadoop@node52:/home/hadoop/.ssh/id_rsa.pub /tmp/id_rsa.pub.node52

# scp hadoop@node53:/home/hadoop/.ssh/id_rsa.pub /tmp/id_rsa.pub.node53

# cat /tmp/id_rsa.pub.node51 >> /home/hadoop/.ssh/authorized_keys

# cat /tmp/id_rsa.pub.node52 >> /home/hadoop/.ssh/authorized_keys

# cat /tmp/id_rsa.pub.node53 >> /home/hadoop/.ssh/authorized_keys

# scp /home/hadoop/.ssh/authorized_keys hadoop@node51:/home/hadoop/.ssh/authorized_keys

# scp /home/hadoop/.ssh/authorized_keys hadoop@node52:/home/hadoop/.ssh/authorized_keys

# scp /home/hadoop/.ssh/authorized_keys hadoop@node53:/home/hadoop/.ssh/authorized_keys

----------------------------------------

安装准备

----------------------------------------

"node50: 解压&修改权限"

# cd /home/hadoop

# tar -zxvf hadoop-2.7.1.tar.gz

# tar -zxvf jdk-8u77-linux-x64.tar.gz

# chown -R hadoop:hadoop hadoop-2.7.1

# chown -R hadoop:hadoop jdk1.8.0_77

# mv jdk1.8.0_77 ./hadoop-2.7.1/java

"node50-node53: /home/hadoop/.bashrc"

...

# Hadoop Config

export JAVA_HOME=/home/hadoop/hadoop-2.7.1/java

export JRE_HOME=$JAVA_HOME/jre

export CLASSPATH=.:$JAVA_HOME/jre/lib/rt.jar:$JRE_HOME/lib/dt.jar:$JRE_HOME/lib/tools.jar

export PATH=$PATH:$JAVA_HOME:$JRE_HOME:$CLASSPATH:$JAVA_HOME/bin:/home/hadoop/hadoop-2.7.1/bin:/home/hadoop/hadoop-2.7.1/sbin

export JAVA_LIBRARY_PATH=/home/hadoop/hadoop-2.7.1/lib/native

----------------------------------------

配置准备(Node50)

----------------------------------------

# su - hadoop

# cd /home/hadoop/hadoop-2.7.1

# mkdir dfs

# mkdir ./dfs/name

# mkdir ./dfs/data

# mkdir tmp

----------------------------------------

配置集群(Node50)

----------------------------------------

1) /home/hadoop/hadoop-2.7.1/etc/hadoop/core-site.xml

<configuration>

<property>

               <name>hadoop.tmp.dir</name>

               <value>file:/home/hadoop/hadoop-2.7.1/tmp</value>

</property>

<property>

               <name>fs.defaultFS</name>

               <value>hdfs://node50:9000</value>

</property>

<property>

               <name>io.native.lib.available</name>

               <value>true</value>

</property>

</configuration>

2) /home/hadoop/hadoop-2.7.1/etc/hadoop/hdfs-site.xml

<configuration>

<property>

<name>dfs.namenode.name.dir</name>

<value>file:/home/hadoop/hadoop-2.7.1/dfs/name</value>

</property>

<property>

<name>dfs.datanode.data.dir</name>

<value>file:/home/hadoop/hadoop-2.7.1/dfs/data</value>

</property>

<property>

<name>dfs.replication</name>

<value>3</value>

</property>

<property>

<name>dfs.namenode.secondary.http-address</name>

<value>node50:9001</value>

</property>

<property>

<name>dfs.webhdfs.enabled</name>

<value>true</value>

</property>

</configuration>

3) /home/hadoop/hadoop-2.7.1/etc/hadoop/mapred-site.xml.template

<configuration>

<property>

<name>mapreduce.framework.name</name>

<value>yarn</value>

</property>

<property>

<name>mapreduce.jobhistory.address</name>

<value>node50:10020</value>

</property>

<property>

<name>mapreduce.jobhistory.webapp.address</name>

<value>node50:19888</value>

</property>

</configuration>

4) /home/hadoop/hadoop-2.7.1/etc/hadoop/yarn-site.xml

<configuration>

<property>

<name>yarn.nodemanager.aux-services</name>

<value>mapreduce_shuffle</value>

</property>

<property>

<name>yarn.nodemanager.auxservices.mapreduce.shuffle.class</name>

<value>org.apache.hadoop.mapred.ShuffleHandler</value>

</property>

<property>

<name>yarn.resourcemanager.address</name>

<value>node50:8032</value>

</property>

<property>

<name>yarn.resourcemanager.scheduler.address</name>

<value>node50:8030</value>

</property>

<property>

<name>yarn.resourcemanager.resource-tracker.address</name>

<value>node50:8031</value>

</property>

<property>

<name>yarn.resourcemanager.admin.address</name>

<value>node50:8033</value>

</property>

<property>

<name>yarn.resourcemanager.webapp.address</name>

<value>node50:8088</value>

</property>

</configuration>

5) /home/hadoop/hadoop-2.7.1/etc/hadoop/slaves

node51

node52

node53

6) /home/hadoop/hadoop-2.7.1/etc/hadoop/hadoop-env.sh

...

# The java implementation to use.

export JAVA_HOME=/home/hadoop/hadoop-2.7.1/java

...

7) /home/hadoop/hadoop-2.7.1/etc/hadoop/yarn-env.sh

...

# some Java parameters

# export JAVA_HOME=/home/y/libexec/jdk1.6.0/

export JAVA_HOME=/home/hadoop/hadoop-2.7.1/java

...

----------------------------------------

配置复制(Node50 & Node51 & Node52 & Node53)

----------------------------------------

# 通过SCP的方式将Hadoop配置包以及环境变量文件复制到所有节点。

----------------------------------------

启动集群(Node50)

----------------------------------------

1)格式化HDFS。

# hdfs namenode -format

2) 启动Hadoop集群。

# /home/hadoop/hadoop-2.7.1/sbin/start-dfs.sh

# /home/hadoop/hadoop-2.7.1/sbin/start-yarn.sh

----------------------------------------

集群验证(Node50 & Node51 & Node52 & Node53)

----------------------------------------

1)http://192.168.239.50:50070/

2)http://192.168.239.50:8088/

// namenode

# hadoop dfsadmin -report

# jps

// datanode

# jps

// namenode

# hadoop fs -ls /

# hadoop fs -mkdir /test

----------------------------------------

注意的问题点

----------------------------------------

 【操作报警告】

 “WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform… using builtin-java classes where applicable”

 

 【问题诊断步骤】

 // 打开日志模式(DEBUG)。

 # export HADOOP_ROOT_LOGGER=DEBUG,console

 # hadoop fs -ls /

 

 // 找到错误点:

 “NativeCodeLoader:/lib/libc.so.6: version `GLIBC_2.14' not found”

 ==> 初步判断是库文件的事儿。

 

 # rpm -qa | grep glic-

 ==> 确认操作系统库版本确实低了一些。

 

 # strings /lib64/libc.so.6 |grep GLIBC_

 ==> 操作系统支持列表里面有2.14这个版本。

 ==> 那就不用考虑别的了,下载新版本,然后编译安装。

2 数据节点无法启动

 【现象】

 //集群启动之后,从50070 WEB端口看的话,没有数据节点信息;从数据节点的JPS来看,服务已经启动。

 

 【问题诊断步骤】

 //查看配置文件<slaves>。

==> 确保所有数据节点都已经配置到文件里面。

 

 //查看数据节点的日志:

 /home/hadoop/hadoop-2.7.1/logs/hadoop-hadoop-datanode-node51.log

 ==> 找到错误点:

 "node50:9000 connection error: refused ......"

 

 # netstat -an | grep 9000

 ==> 有点奇怪。

 “tcp        0      0 127.0.0.1:9000     0.0.0.0:-               LISTEN   ”

 ==> 显示的地址为什么是操作系统回路地址呢?

 

 # cat /etc/hosts

 ==> 发现问题原因所在

 127.0.0.1 node50

 ::1         localhost localhost.localdomain localhost6 localhost6.localdomain6

 192.168.239.50 node50

 192.168.239.51 node51

 192.168.239.52 node52

 192.168.239.53 node53

 ==> 把第一行改回来

 “127.0.0.1 localhost localhost.localdomain localhost6 localhost6.localdomain6”

4 Zookeeper 集群搭建

----------------------------------------

环境描述

----------------------------------------

RedhatLinux 7.1(Node50-53)

zookeeper-3.4.8.tar.gz

----------------------------------------

安装准备(Node50)

----------------------------------------

# tar -zxvf zookeeper-3.4.8.tar.gz

# chown -R hadoop:hadoop zookeeper-3.4.8

# mv zookeeper-3.4.8 /home/hadoop/

----------------------------------------

环境配置(Node50 & Node51 & Node52 & Node53)

----------------------------------------

/home/hadoop/.bashrc

...

# Zookeeper Config

...

export ZOOKEEPER_HOME=/home/hadoop/zookeeper-3.4.8

export PATH=$PATH:$ZOOKEEPER_HOME/bin

----------------------------------------

配置准备(Node50)

----------------------------------------

# su - hadoop

# cd /home/hadoop/zookeeper-3.4.8

# mkdir data

# mkdir log

----------------------------------------

配置集群(Node50)

----------------------------------------

1) /home/hadoop/zookeeper-3.4.8/conf/zoo.cfg

# The number of milliseconds of each tick

tickTime=2000

# The number of ticks that the initial 

# synchronization phase can take

initLimit=10

# The number of ticks that can pass between 

# sending a request and getting an acknowledgement

syncLimit=5

# the directory where the snapshot is stored.

# do not use /tmp for storage, /tmp here is just 

# example sakes.

dataDir=/home/hadoop/zookeeper-3.4.8/data

dataLogDir=/home/hadoop/zookeeper-3.4.8/log

# the port at which the clients will connect

clientPort=2181

# The number of snapshots to retain in dataDir

#autopurge.snapRetainCount=3

# Purge task interval in hours

# Set to "0" to disable auto purge feature

#autopurge.purgeInterval=1

server.2=node51:2888:3888

server.3=node52:2888:3888

server.4=node53:2888:3888

2) /home/hadoop/zookeeper-3.4.8/data/myid

1

==> 注意:每一个服务器对应自己的ID(node50为1,node51为2,node52为3,node53为4)

----------------------------------------

配置复制(Node50 & Node51 & Node52 & Node53)

----------------------------------------

# 通过SCP的方式将Hadoop配置包以及环境变量文件复制到所有节点。

----------------------------------------

启动集群(Node50 & Node51 & Node52 & Node53)

----------------------------------------

# zkServer.sh start

----------------------------------------

集群验证(Node50 & Node51 & Node52 & Node53)

----------------------------------------

// Node50:

[hadoop@node50 ~]$ jps

3446 QuorumPeerMain    <==这个出现

2711 DataNode

3064 ResourceManager

2603 NameNode

2878 SecondaryNameNode

4478 Jps

// Node51-53:

[hadoop@node52 ~]$ jps

2356 DataNode

2501 QuorumPeerMain    <==这个出现

2942 Jps

5 Hbase 搭建

----------------------------------------

环境描述

----------------------------------------

RedhatLinux 7.1(Node50-53)

hbase-1.1.4.tar.gz

jdk-8u77-linux-x64.tar.gz

----------------------------------------

安装准备(Node50)

----------------------------------------

# tar -zxvf hbase-1.1.4.tar.gz

# chown -R hadoop:hadoop hbase-1.1.4

# mv hbase-1.1.4 /home/hadoop/

----------------------------------------

配置准备(Node50)

----------------------------------------

# su - hadoop

# cd /home/hadoop/hbase-1.1.4

# mkdir data

----------------------------------------

配置集群(Node50)

----------------------------------------

1) /home/hadoop/hbase-1.1.4/conf/hbase-site.xml

<configuration>

   <property>

           <name>hbase.rootdir</name>  

           <value>hdfs://node50:9000/hbase</value>  

   </property>

   <property>

           <name>hbase.cluster.distributed</name>

           <value>true</value>

   </property>

   <property>

           <name>hbase.zookeeper.quorum</name>

           <value>node50,node51,node52,node53</value>

   </property>

   <property>  

           <name>dfs.replication</name>

           <value>3</value>

   </property>

   <property>  

           <name>hbase.zookeeper.property.dataDir</name>

           <value>/home/hadoop/zookeeper-3.4.8/data</value>

   </property>

</configuration>

2) /home/hadoop/hbase-1.1.4/conf/hbase-env.sh

...

# The java implementation to use.  Java 1.7+ required.

export JAVA_HOME=/home/hadoop/hadoop-2.7.1/java

# see http://wiki.apache.org/hadoop/PerformanceTuning

export HBASE_OPTS="-XX:+UseConcMarkSweepGC"

# Configure PermSize. Only needed in JDK7. You can safely remove it for JDK8+

export HBASE_MASTER_OPTS="$HBASE_MASTER_OPTS -XX:PermSize=128m -XX:MaxPermSize=128m"

export HBASE_REGIONSERVER_OPTS="$HBASE_REGIONSERVER_OPTS -XX:PermSize=128m -XX:MaxPermSize=128m"

# export HBASE_SLAVE_SLEEP=0.1

# Tell HBase whether it should manage it's own instance of Zookeeper or not.

export HBASE_MANAGES_ZK=false

3) /home/hadoop/hbase-1.1.4/conf/regionservers

node51

node52

node53

----------------------------------------

配置复制(Node50 & Node51 & Node52 & Node53)

----------------------------------------

# 通过SCP的方式将Hadoop配置包以及环境变量文件复制到所有节点。

----------------------------------------

启动集群(Node50)

----------------------------------------

# start-hbase.sh

----------------------------------------

集群验证(Node50 & Node51 & Node52 & Node53)

----------------------------------------

// Node50:

[hadoop@node51 logs]$ jps

5396 Jps

3446 QuorumPeerMain

2711 DataNode

3064 ResourceManager

2603 NameNode

2878 SecondaryNameNode

3599 HMaster     <===

// Node51-53:

[hadoop@node52 conf]$ jps

3346 Jps

2356 DataNode

2501 QuorumPeerMain

2665 HRegionServer     <===

6 Hive搭建

----------------------------------------

环境描述

----------------------------------------

RedhatLinux 7.1(Node50)

apache-hive-2.0.0-bin.tar.gz

mysql-5.7.12-1.el7.x86_64.rpm-bundle.tar

----------------------------------------

安装准备

----------------------------------------

# tar -xvf mysql-5.7.12-1.el7.x86_64.rpm-bundle.tar

# rpm -Uvh mysql-community-common-5.7.12-1.el7.x86_64.rpm

# rpm -Uvh mysql-community-libs-5.7.12-1.el7.x86_64.rpm

# rpm -Uvh mysql-community-libs-compat-5.7.12-1.el7.x86_64.rpm

# rpm -Uvh mysql-community-client-5.7.12-1.el7.x86_64.rpm

# rpm -Uvh mysql-community-server-5.7.12-1.el7.x86_64.rpm

# tar -zxvf apache-hive-2.0.0-bin.tar.gz

# chown -R hadoop:hadoop apache-hive-2.0.0-bin

# mv apache-hive-2.0.0-bin /home/hadoop/hive-2.0.0

----------------------------------------

配置数据库

----------------------------------------

# systemctl start mysqld.service

//获取mysql产生的临时密码

# grep 'temporary password' /var/log/mysqld.log

//输入临时密码

# mysql -u root -p

//修改用户密码

# ALTER USER 'root'@'localhost' IDENTIFIED BY $newpassword;

//创建数据库并赋予权限

mysql> create database hive;

mysql> grant all on hive.* to 'root'@'%' identified by 'ROOT@root@123';

mysql> grant all on *.* to 'root'@'%' identified by 'ROOT@root@123';

//这一步非常重要,否则hive连接mysql会报错

# systemctl restart mysqld.service

----------------------------------------

配置Hive

----------------------------------------

1) /home/hadoop/hive-2.0.0/conf/hive-env.sh

...

export HADOOP_HEAPSIZE=1024

HADOOP_HOME=/home/hadoop/hadoop-2.7.1

export HIVE_CONF_DIR=/home/hadoop/hive-2.0.0/conf

export HIVE_AUX_JARS_PATH=/home/hadoop/hive-2.0.0/lib

...

//修改以上几处

2) /home/hadoop/hive-2.0.0/conf/hive-site.xml

<configuration>  

<property>  

<name>javax.jdo.option.ConnectionURL</name>  

<value>jdbc:mysql://node50:3306/hive?createDatabaseIfNotExist=true</value>

</property>  

<property>  

<name>javax.jdo.option.ConnectionDriverName</name>  

<value>com.mysql.jdbc.Driver</value>

</property>  

<property>  

<name>javax.jdo.option.ConnectionUserName</name>  

<value>root</value>  

</property>  

<property>  

<name>javax.jdo.option.ConnectionPassword</name>  

<value>ROOT@root@123</value>  

</property>  

</configuration>  

----------------------------------------

环境变量

----------------------------------------

1)/home/.bashrc

...

# Hive Config

export HIVE_HOME=/home/hadoop/hive-2.0.0

export PATH=$PATH:$HIVE_HOME/bin

2)复制mysql连接驱动包

cp mysql-connector-java-5.1.18-bin.jar /home/hadoop/hive-2.0.0/lib/

----------------------------------------

Hive启动&验证

----------------------------------------

# schematool -dbType mysql -initSchema

# hive

# show tables;


如果觉得我的文章对您有用,请点赞。您的支持将鼓励我继续创作!

0

添加新评论1 条评论

fayefaye软件开发工程师ABC
2016-06-01 21:30
图没贴上?
Ctrl+Enter 发表

本文隶属于专栏

最佳实践
不同的领域,都有先行者,实践者,用他们的最佳实践来加速更多企业的建设项目落地。

作者其他文章

X社区推广