搭建Hadoop-1.2.1&hbase-0.94.17&hive-0.9.0&centos6.8_x64集群

一、搭建环境
--yum install -y java-1.7.0-openjdk.x86_64
系统:CentOS6.8_x64 hadoop-1.2.1 zookeeper-3.4.6 hbase-0.94.17
下载软件包:
--hadoop-1.2.1.tar.gz
http://mirrors.cnnic.cn/apache/hadoop/common/hadoop-1.2.1/hadoop-1.2.1.tar.gz
--zookeeper-3.4.6.tar.gz
http://mirrors.cnnic.cn/apache/zookeeper/zookeeper-3.4.6/zookeeper-3.4.6.tar.gz
--hbase-0.94.17.tar.gz
http://archive.apache.org/dist/hbase/hbase-0.94.17/
 
hadoop    192.168.92.17 NameNode
hadoop1 192.168.92.18 Datanode
hadoop2 192.168.92.19 Datanode
 
--通过浏览器进行查看hadoop等运行状态
--namenode
http://192.168.92.17:50070
--dirctory
http://192.168.92.17:50075
--tracker_hadoop
http://192.168.92.17:50060
--Map/Reduce
http://192.168.92.17:50030
--hbase
http://192.168.92.17:60010/
--hive web interface
http://192.168.92.17:9999/hwi/
 
二、搭建流程
useradd -d /home/hadoop -s /bin/bash -m hadoop
passwd hadoop
 
chkconfig iptables off
service iptables stop
vi /etc/sysconfig/selinux
SELINUX=disabled
 
--Master设置ntpdate服务
chkconfig ntpd on
service ntpd restart
vi /etc/rc.local 加入以下内容 同时确保这slave节点上ntpd要关闭
 
while [ 1 ]; do ntpdate hadoop 1>/dev/null 2>&1; sleep 2; done &
 
--设置hadoop用户的Shell Limits,用root用户登录
vi /etc/security/limits.conf 添加
hadoop  -       nofile  32768
 
--slave节点关闭ntpd
service ntpd status
chkconfig ntpd --list
chkconfig ntpd off
service ntpd stop
 
vim /etc/hosts
 
192.168.92.17   hadoop
192.168.92.18   hadoop1
192.168.92.19   hadoop2
 
--将安装包拷贝到/home/hadoop目录
cd /home/hadoop
chown hadoop.hadoop *
chmod 775 *
 
su - hadoop
--配置各节点间的hadoop用户的ssh公钥互信
--namenode datanode
--export HADOOP_HOME_WARN_SUPPRESS=1此配置是用来解决hadoop启动警告
vim ~/.bash_profile
 
JAVA_HOME=/home/hadoop/jdk1.7.0_55
export PATH=$JAVA_HOME/bin:$PATH
export CLASSPATH=.:$JAVA_HOME/lib/dt.jar:$JAVA_HOME/lib/tools.jar
HADOOP_HOME=/home/hadoop/hadoop-1.2.1
PATH=$PATH:$JAVA_HOME/bin:$HADOOP_HOME/bin:$HADOOP_HOME/sbin
export JAVA_HOME PATH HADOOP_HOME
export HADOOP_HOME_WARN_SUPPRESS=1
 
export HBASE_HOME=/home/hadoop/hbase-0.94.17
export PATH=$PATH:$HBASE_HOME/bin
ZK_HOME=/home/hadoop/zookeeper-3.4.6
PATH=$ZK_HOME/bin:$PATH
export PATH ZK_HOME
 
source ~/.bash_profile
 
mkdir ~/.ssh
--需对.ssh目录赋予700权限
chmod 700 ~/.ssh
cd .ssh
--3个节点执行
ssh-keygen -t rsa
--namenode执行
cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys
--cat ~/.ssh/id_rsa.pub|ssh hadoop1 'sh -c "cat - >>~/.ssh/authorized_keys"'
--cat ~/.ssh/id_rsa.pub|ssh hadoop2 'sh -c "cat - >>~/.ssh/authorized_keys"'
ssh hadoop1 cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys
ssh hadoop2 cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys
chmod 644 ~/.ssh/authorized_keys
 
--复制到 datanode
scp ~/.ssh/authorized_keys root@node2:~/.ssh/authorized_keys
scp ~/.ssh/authorized_keys root@node3:~/.ssh/authorized_keys
 
--验证ssh
ssh hadoop date
ssh hadoop1 date
ssh hadoop2 date
 
--安装配置jdk 可以用jdk-6u45-linux-x64.bin 部署集群后可以更换jdk版本
cd
tar xvf jdk-7u55-linux-x64.tar.gz
--./jdk-6u45-linux-x64.bin
--复制到slave节点
scp -r jdk1.7.0_55 hadoop@hadoop1:/home/hadoop
scp -r jdk1.7.0_55 hadoop@hadoop2:/home/hadoop
 
tar -xvf hadoop-1.2.1.tar.gz
tar -xvf hbase-0.94.17.tar.gz
tar -xvf zookeeper-3.4.6.tar.gz
 
--安装hadoop
vim hadoop-1.2.1/conf/hadoop-env.sh
 
export JAVA_HOME=/home/hadoop/jdk1.7.0_55
 
--配置主配置文件
vim hadoop-1.2.1/conf/core-site.xml
 
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!-- Put site-specific property overrides in this file. -->
<configuration>
  <property>
    <name>fs.default.name</name>
    <value>hdfs://192.168.92.17:9000</value>
  </property>
</configuration>
 
--修改hdfs配置文件hadoop-1.2.1/conf/hdfs-site.xml
--mkdir -p /home/hadoop/hadoop-1.2.1/data/dfs/name
--mkdir -p /home/hadoop/hadoop-1.2.1/data/dfs/data
--mkdir -p /home/hadoop/hadoop-1.2.1/data/dfs/namesecondary
 
vim hadoop-1.2.1/conf/hdfs-site.xml
 
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!-- Put site-specific property overrides in this file. -->
<configuration>
  <property>
    <name>dfs.name.dir</name>
    <value>/home/hadoop/hadoop-1.2.1/data/dfs/name</value>
  </property>
  <property>
    <name>dfs.data.dir</name>
    <value>/home/hadoop/hadoop-1.2.1/data/dfs/data</value>
  </property>
  <property>
    <name>dfs.replication</name>
    <value>3</value>   <!-- 表示3个从服务器  -->
  </property>
</configuration>
 
--配置任务调度服务配置hadoop-1.2.1/conf/mapred-site.xml
vim hadoop-1.2.1/conf/mapred-site.xml
 
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!-- Put site-specific property overrides in this file. -->
<configuration>
  <property>
    <name>mapred.job.tracker</name>
    <value>hdfs://192.168.92.17:9001</value> <!-- 配置到主服务器9001端口 -->
  </property>
</configuration>
 
--配置主服务器地址 hadoop-1.2.1/conf/masters
vim hadoop-1.2.1/conf/masters
hadoop
 
--配置从服务器地址 应注释192.168.92.17
vim hadoop-1.2.1/conf/slaves
hadoop1
hadoop2
   
--格式化节点服务器 注意:如果执行过程出现ERROR信息必须解决后重新格式化
hadoop namenode -format
 
14/03/27 10:46:39 INFO namenode.NameNode: STARTUP_MSG:
/************************************************************
STARTUP_MSG: Starting NameNode
STARTUP_MSG:   host = hadoop/192.168.92.17
STARTUP_MSG:   args = [-format]
STARTUP_MSG:   version = 1.2.1
STARTUP_MSG:   build = https://svn.apache.org/repos/asf/hadoop/common/branches/branch-1.2 -r 1503152; compiled by 'mattf' on Mon Jul 22 15:23:09 PDT 2013
STARTUP_MSG:   java = 1.6.0_45
************************************************************/ 
14/03/27 10:46:39 INFO util.GSet: Computing capacity for map BlocksMap
14/03/27 10:46:39 INFO util.GSet: VM type       = 64-bit
14/03/27 10:46:39 INFO util.GSet: 2.0% max memory = 1013645312
14/03/27 10:46:39 INFO util.GSet: capacity      = 2^21 = 2097152 entries
14/03/27 10:46:39 INFO util.GSet: recommended=2097152, actual=2097152
14/03/27 10:46:40 INFO namenode.FSNamesystem: fsOwner=hadoop
14/03/27 10:46:40 INFO namenode.FSNamesystem: supergroup=supergroup
14/03/27 10:46:40 INFO namenode.FSNamesystem: isPermissionEnabled=true
14/03/27 10:46:40 INFO namenode.FSNamesystem: dfs.block.invalidate.limit=100
14/03/27 10:46:40 INFO namenode.FSNamesystem: isAccessTokenEnabled=false accessKeyUpdateInterval=0 min(s), accessTokenLifetime=0 min(s)
14/03/27 10:46:40 INFO namenode.FSEditLog: dfs.namenode.edits.toleration.length = 0
14/03/27 10:46:40 INFO namenode.NameNode: Caching file names occuring more than 10 times
14/03/27 10:46:41 INFO common.Storage: Image file /tmp/hadoop-hadoop/dfs/name/current/fsimage of size 114 bytes saved in 0 seconds.
14/03/27 10:46:41 INFO namenode.FSEditLog: closing edit log: position=4, editlog=/tmp/hadoop-hadoop/dfs/name/current/edits
14/03/27 10:46:41 INFO namenode.FSEditLog: close success: truncate to 4, editlog=/tmp/hadoop-hadoop/dfs/name/current/edits
14/03/27 10:46:41 INFO common.Storage: Storage directory /tmp/hadoop-hadoop/dfs/name has been successfully formatted.
14/03/27 10:46:41 INFO namenode.NameNode: SHUTDOWN_MSG:
/************************************************************
SHUTDOWN_MSG: Shutting down NameNode at hadoop/192.168.92.17
************************************************************/
 
--拷贝hadoop-1.2.1到其他服务器
scp -r hadoop-1.2.1 hadoop@hadoop1:~
scp -r hadoop-1.2.1 hadoop@hadoop2:~
 
--启动和停止集群
start-all.sh
 
starting namenode, logging to /home/hadoop/hadoop-1.2.1/libexec/../jpslogs/hadoop-hadoop-namenode-hadoop.out
192.168.92.18: starting datanode, logging to /home/hadoop/hadoop-1.2.1/libexec/../logs/hadoop-hadoop-datanode-hadoop1.out
192.168.92.19: starting datanode, logging to /home/hadoop/hadoop-1.2.1/libexec/../logs/hadoop-hadoop-datanode-hadoop2.out
192.168.92.17: starting datanode, logging to /home/hadoop/hadoop-1.2.1/libexec/../logs/hadoop-hadoop-datanode-hadoop.out
192.168.92.17: starting secondarynamenode, logging to /home/hadoop/hadoop-1.2.1/libexec/../logs/hadoop-hadoop-secondarynamenode-hadoop.out
starting jobtracker, logging to /home/hadoop/hadoop-1.2.1/libexec/../logs/hadoop-hadoop-jobtracker-hadoop.out
192.168.92.18: starting tasktracker, logging to /home/hadoop/hadoop-1.2.1/libexec/../logs/hadoop-hadoop-tasktracker-hadoop1.out
192.168.92.19: starting tasktracker, logging to /home/hadoop/hadoop-1.2.1/libexec/../logs/hadoop-hadoop-tasktracker-hadoop2.out
192.168.92.17: starting tasktracker, logging to /home/hadoop/hadoop-1.2.1/libexec/../logs/hadoop-hadoop-tasktracker-hadoop.out
 
jps
3749 DataNode
3629 NameNode
3972 JobTracker
4102 TaskTracker
4149 Jps
3872 SecondaryNameNode
 
jps
3690 Jps
3607 TaskTracker
3502 DataNode
 
--在主服务器上输入stop-all.sh就可以关闭整个集群
stop-all.sh
stopping jobtracker
192.168.92.18: stopping tasktracker
192.168.92.17: stopping tasktracker
192.168.92.19: stopping tasktracker
stopping namenode
192.168.92.18: stopping datanode
192.168.92.17: stopping datanode
192.168.92.19: stopping datanode
192.168.92.17: stopping secondarynamenode
 
--如果某些地址看不了,应该是windows下的hosts文件没有配置主机IP和主机名映射导致的,比如windows7下,就修改C:\Windows\System32\drivers\etc\hosts这个文件,加入主机名和IP映射
192.168.92.17   hadoop1
192.168.92.18   hadoop2
192.168.92.19   hadoop3
 
---------------------------------------------------------------------
HBase在Hadoop集群下搭建过程:
在安装HBase集群前,必须先安装zookeeper。
ZooKeeper是Hadoop的正式子项目,它是一个针对大型分布式系统的可靠协调系统,提供的功能包括:配置维护、名字服务、分布式同步、组服务等。ZooKeeper的目标就是封装好复杂易出错的关键服务,将简单易用的接口和性能高效、功能稳定的系统提供给用户。Zookeeper是Google的Chubby一个开源的实现,是高有效和可靠的协同工作系统,Zookeeper能够用来leader选举,配置信息维护等,在一个分布式的环境中,需要一个Master实例或存储一些配置信息,确保文件写入的一致性等.ZooKeeper是一个分布式的,开放源码的分布式应用程序协调服务,包含一个简单的原语集,是Hadoop和Hbase的重要组件。HBase需要Zookeeper来协调HBase集群,Zookeeper Quorum中除了存储了 HBase的-ROOT-表的地址和HMaster的地址,HRegionServer也会把自己以Ephemeral方式注册到Zookeeper中,使得 HMaster可以随时感知到各个HRegionServer的健康状态。此外,Zookeeper也避免了HBase中HMaster的单点问题。
一、安装zookeeper
cd /home/hadoop/zookeeper-3.4.6/conf/
cp zoo_sample.cfg zoo.cfg
vim zoo.cfg
 
#修改此目录
dataDir=/home/hadoop/zookeeper-3.4.6/data
#添加如下内容 
server.1=192.168.92.17:2887:3887
server.2=192.168.92.18:2888:3888
server.3=192.168.92.19:2889:3889
 
输入服务器编号myid,分别为1 2 3
cd ..
mkdir data
cd data
vim myid
1
 
--拷贝数据到其他服务器
cd ~
scp -r zookeeper-3.4.6 hadoop@hadoop1:~
scp -r zookeeper-3.4.6 hadoop@hadoop2:~
--复制完成,以hadoop用户登录到其他服务器,修改zookeeper-3.4.6/data/myid 文件的内容,hadoop1服务器就改成2,hadoop2服务器就改成3
vim zookeeper-3.4.6/data/myid
2
vim zookeeper-3.4.6/data/myid
3
 
--启动验证
在从hadoop2、hadoop1,hadoop,依次执行zkServer.sh start 来启动 zookeeper,所有服务器启动完成后,就可以通过zkServer.sh status来查看服务器状态。没有报错说明都正常了。输入jps可以看到服务器中多了一个QuorumPeerMain服务。
zkServer.sh start
zkServer.sh start
zkServer.sh start
 
JMX enabled by default
Using config: /home/hadoop/zookeeper-3.4.6/bin/../conf/zoo.cfg
Starting zookeeper ... STARTED
 
zkServer.sh status
 
JMX enabled by default
Using config: /home/hadoop/zookeeper-3.4.6/bin/../conf/zoo.cfg
Mode: follower
 
jps
4870 SecondaryNameNode
4625 NameNode
4746 DataNode
5102 TaskTracker
6377 Jps
5744 QuorumPeerMain
4971 JobTracker
 
二、 安装hbase
--配置数据目录、集群模式、Zookeeper服务器地址
--hbase.rootdir与hadoop的core-site.xml的fs.default.name配置一样:hdfs://hadoop:9000/hbase
vim hbase-0.94.17/conf/hbase-site.xml
 
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
  <property>
    <name>hbase.rootdir</name>
    <value>hdfs://hadoop:9000/hbase</value>
    <description>区域服务器使用存储HBase数据库数据的目录,服务器名称不能填IP,不然会报错</description>
  </property>
  <property>
     <name>hbase.cluster.distributed</name>
     <value>true</value>
     <description>指定HBase运行的模式:false: 单机模式或者为分布式模式true: 全分布模式 </description>
  </property>
  <property>
    <name>hbase.zookeeper.quorum</name>
    <value>hadoop,hadoop1,hadoop2</value>
    <description>ZooKeeper集群服务器的位置</description>
  </property>
</configuration>
 
--配置数据服务器地址
vim hbase-0.94.17/conf/regionservers
hadoop
hadoop1
hadoop2
 
--配置HBase中Zeekeeper使用方式
--在hbase-0.94.17/conf/hbase-env.sh文件最尾部,打开注释 export HBASE_MANAGES_ZK=false,修改true为false。意思是使用外部的Zeekeeper
vim hbase-0.94.17/conf/hbase-env.sh
 
export HBASE_MANAGES_ZK=false
export JAVA_HOME=/home/hadoop/jdk1.7.0_55
 
--复制HBase目录到其他服务器
--在hadoop上以hadoop用户,使用以下命令进行复制:
scp -r hbase-0.94.17 hadoop@hadoop1:~
scp -r hbase-0.94.17 hadoop@hadoop2:~
 
--启动,验证
拷贝完成后就可以输入:start-hbase.sh启动HBase集群了;启动完成后,hadoop上使用jps命令可以看到多了一个HMaster服务,在子节点输入jps可以看到多了一个HRegionServer服务; 登录HBase可以使用hbase shell命令登录HBase,输入status查看当前状态。输入exit退出HBase服务。
start-hbase.sh
 
starting master, logging to /home/hadoop/hbase-0.94.17/logs/hbase-hadoop-master-hadoop.out
192.168.92.18: starting regionserver, logging to /home/hadoop/hbase-0.94.17/bin/../logs/hbase-hadoop-regionserver-hadoop1.out
192.168.92.19: starting regionserver, logging to /home/hadoop/hbase-0.94.17/bin/../logs/hbase-hadoop-regionserver-hadoop2.out
192.168.92.17: starting regionserver, logging to /home/hadoop/hbase-0.94.17/bin/../logs/hbase-hadoop-regionserver-hadoop.out
 
jps
 
4870 SecondaryNameNode
4625 NameNode
6013 HMaster
4746 DataNode
5102 TaskTracker
6377 Jps
5744 QuorumPeerMain
4971 JobTracker
6171 HRegionServer
 
hbase shell
 
HBase Shell; enter 'help<RETURN>' for list of supported commands.
Type "exit<RETURN>" to leave the HBase Shell
Version 0.94.17, r1569509, Tue Feb 18 22:25:31 UTC 2014
 
status
 
3 servers, 0 dead, 0.6667 average load
 
--退出hbase
exit
 
--通过浏览器查看:在浏览器中输入
--Master: hadoop
http://192.168.92.17:60010
 
--启动集群
start-all.sh
--所有节点执行zkServer.sh start
zkServer.sh start
--查询zkServer进程状态
zkServer.sh status
start-hbase.sh star
--停止集群
stop-hbase.sh star
zkServer.sh stop
stop-all.sh
 
---------------------------------------------
--hive客户端script操作的网页页面 对应服务为RunJar
http://192.168.92.17:9999/hwi
--整合 hive-0.9.0 hbase-0.94.17
tar -xzvf hive-0.9.0.tar.gz
mkdir hive-config
cd conf
cp hive-env.sh.template hive-env.sh
cp hive-default.xml.template hive-default.xml
cp hive-default.xml.template hive-site.xml
cp hive-exec-log4j.properties.template hive-exec-log4j.properties.xml
cp hive-log4j.properties.template hive-log4j.properties
 
--在hive-log4j.properties中将log4j.appender.EventCounter的值修改为org.apache.hadoop.log.metrics.EventCounter,这样就不会报WARNING: org.apache.hadoop.metrics.jvm.EventCounter is deprecated. Please use org.apache.hadoop.log.metrics.EventCounter in all the log4j.properties files.的警告了。
vi hive-log4j.properties 修改如下
 
log4j.appender.EventCounter=org.apache.hadoop.log.metrics.EventCounter
 
--修改用户的环境变量,加入以下内容。
vi  ~/.bash_profile
 
export HIVE_HOME=/home/hadoop/hive-0.9.0
export PATH=$PATH:$HIVE_HOME/bin
 
source ~/.bash_profile
 
--退出hadoop用户重新登录,执行hive命令
hive
 
Logging initialized using configuration in file:/hadoop/hive-config/hive-log4j.properties
Hive history file=/tmp/hadoop/hive_job_log_hadoop_201209171124_1209357583.txt
 
hive> show tables;
OK
Time taken: 4.222 seconds
 
--式例 hbase测试
hadoop fs -lsr /
--表名test 键值名rowkey 字段属性info
create 'test','rowkey','info';
list
put 'test','rowkey1','info:name','zhangsan'
put 'test','rowkey2','info:address',''
scan 'test'
--更新键值rowkey2对应的记录
put 'test','rowkey2','info:address','shanghai'
--查询'test'表中的数据
scan 'test'
exit
 
--可以配置hive页面等
vi hive-0.9.0/conf/hive-env.sh 添加
export HIVE_HOME=/home/hadoop/hive-0.9.0
export PATH=$HIVE_HOME/bin:$PATH
JAVA_HOME=/home/hadoop/jdk1.7.0_55
export CLASSPATH=.:$JAVA_HOME/lib:$JRE_HOME/lib:$CLASSPATH
export PATH=.:$JAVA_HOME/bin:$JRE_HOME/bin:$PATH
 
--编辑hive-site.xml添加
--hive-hwi-0.9.0.war为hive的页面对应的包
--无,file:///hadoop/hbase/lib/protobuf-java-2.4.0a.jar
--自动创建/home/hadoop/hive-0.9.0/logs
vi hive-site.xml
 
<property>
   <name>hbase.zookeeper.quorum</name>
   <value>hadoop,hadoop1,hadoop2</value>
</property>
<property>
  <name>hive.aux.jars.path</name>
  <value>file:///hadoop/hive/lib/hive-hbase-handler-0.9.0.jar,file:///hadoop/hive/lib/zookeeper-3.4.3.jar,file:///hadoop/hive/lib/hbase-0.92.0.jar</value>
</property>
<property>
  <name>hive.querylog.location</name>
  <value>/home/hadoop/hive-0.9.0/logs</value>
</property>
 
--当多用户登录时需安装mysql修改如下参数
--avax.jdo.option.ConnectionURL(当出现字符集问题是在此指定字符集)
<property>
  <name>javax.jdo.option.ConnectionURL</name>
   <value>jdbc:mysql://hadoop:3306/hive</value>
</property>
<property>
  <name>javax.jdo.option.ConnectionDriverName</name>
   <value>com.mysql.jdbc.Driver</value>
</property>
<property>
  <name>javax.jdo.option.ConnectionUserName</name>
  <value>hive</value>
</property>
<property>
  <name>javax.jdo.option.ConnectionPassword</name>
  <value>oracle</value>
</property>
 
--启动hive的web界面 如何停止???
sh $HIVE_HOME/bin/hive --service hwi &
--hiv连接habse测试
hive
--外部表不能load data数据
CREATE TABLE hbase_table_1(key int, value string) STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' WITH SERDEPROPERTIES ("hbase.columns.mapping" = ":key,cf1:val") TBLPROPERTIES ("hbase.table.name" = "xyz");
select * from hbase_table_1;
--查询表结构
desc hbase_table_1;
quit;
--转到hbase
--查询与hive的表hbase_table_1对应的hbae中的表xyz
hbase shell
list
scan 'xyz'
 
--创建本地表
--准备测试数据
cd /home/hadoop/data_hive
vi test.txt
1       'a1'
2       'a2'
 
CREATE TABLE hbase_table_2(key int, value string) ROW FORMAT DELIMITED fields TERMINATED BY '\t' STORED AS TEXTFILE;
--load data到表hbase_table_2
load data local inpath '/home/hadoop/data_hive/test.txt' into table hbase_table_2;
 
--注意有可能jar包版本不一致需要在hbase和hadoop拷贝对应版本
--mysql安装
rpm -ivh mysql-community-common-5.7.18-1.el5.x86_64.rpm --nodeps --force
rpm -ivh mysql-community-libs-5.7.18-1.el5.x86_64.rpm
rpm -ivh mysql-community-client-5.7.18-1.el5.x86_64.rpm
rpm -ivh mysql-community-server-5.7.18-1.el5.x86_64.rpm
 
--mysql初始化 默认路径和指定路径2个库都进行文件初始化 默认路径初始化要求的密码复杂度高
mkdir -pv /usr/local/mysql/data
 
--初始化过程无任何提示
mysqld --initialize --user=mysql --datadir=/usr/local/mysql/data --innodb_undo_tablespaces=3 --explicit_defaults_for_timestamp
 
--------------------------
vi /etc/my.cnf 修改为
 
--注释datadir
#datadir=/var/lib/mysql
 
--增加以下
datadir=/usr/local/mysql/dat
 
[mysql.server]
user=mysql
basedir=/usr/local/mysql
 
[client]
/var/lib/mysql/mysql.sock
 
--------------------------
--开启mysqld服务 数据文件路径设置问题
service mysqld start
--端口是否打开
lsof -i:3306
--mysqld服务是否正在运行
service mysqld status
 
--查询mysql初始化密码(自定义路径的库) password:t-HsdeRt?8d0
grep 'temporary password' /var/log/mysqld.log
--cat /root/.mysql_secret
 
--设置密码 进入mysql
mysql -uroot -p
--输入上一步查询出的初始化密码
SET PASSWORD = PASSWORD('oracle');
--或
alter user root@localhost identified by 'oracle';
flush privileges;
 
--mysql创建数据库hive
created database hive;
--创建用户
grant all on *.* to hadoop@'%' identified by 'oracle';
--grant ALL ON *.* to 'hadoop'@'192.168.0.0/255.255.0.0' identified by 'oracle';
flush privileges;
--创建测试数据库 指定字符集和编码
show databases;
--创建hive数据库
create database hive;
 
---------------------------------------------------------------------
--整合mysql到hive后 load data报错 但数据可以load成功
load data local inpath '/home/hadoop/data_hive/test.txt' into table test;
Copying data from file:/home/hadoop/data_hive/test.txt
Copying file: file:/home/hadoop/data_hive/test.txt
Loading data to table default.test
Failed with exception Iteration request failed : SELECT `A0`.`BUCKET_COL_NAME`,`A0`.`INTEGER_IDX` AS NUCORDER0 FROM `BUCKETING_COLS` `A0` WHERE `A0`.`SD_ID` = ? AND `A0`.`INTEGER_IDX` >= 0 ORDER BY NUCORDER0
FAILED: Execution Error, return code 1 from org.apache.hadoop.hive.ql.exec.MoveTask
 
--去掉hadoop的安全模式
--确认hbase中conf配置文件hbase.site.xml中的hbase.rootdir值: hdfs://master:54310/hbase 与 hadoop中配置文件core-site.xml中的fs.default.name值:hdfs://master:54310/hbase一样
--重新执行Hadoop和Hbase进程时,要kill掉当前的HBase,Hadoop进程
hadoop dfsadmin -safemode leave
 
常见错误
error 1:
---------------------
hive> show tables;
FAILED: Error in metadata: javax.jdo.JDOFatalInternalException: Error creating transactional connection factory
 
Solution:
--下载mysql-connector-java-5.1.18.tar.gz Hive不带mysql JDBC驱动
--wget http://dev.mysql.com/get/Downloads/Connector-J/mysql-connector-java-5.1.18.tar.gz/from/http://mysql.mirror.kangaroot.net/
tar zxf mysql-connector-java-5.1.18.tar.gz
cd mysql-connector-java-5.1.18
cp mysql-connector*.jar $HIVE_HOME/lib
 
error 2:
----------------------
hive> show tables;
FAILED: Error in metadata: javax.jdo.JDOException: Couldnt obtain a new sequence (unique id) : Cannot execute statement: impossible to write to binary log since BINLOG_FORMAT = STATEMENT and at least one table uses a storage engine limited to row-based logging. InnoDB is limited to row-logging when transaction isolation level is READ COMMITTED or READ UNCOMMITTED.
 
Solution:
在mysql中设置 binlog_format='MIXED'
mysql> SET SESSION binlog_format = 'MIXED';
 
posted @ 2021-01-05 21:47  virtual_daemon  阅读(85)  评论(0编辑  收藏  举报