大数据组件安装配置汇总、脚本汇总
目录
虚拟机准备
- 第一次安装centos7配置,https://blog.csdn.net/weixin_37680513/article/details/105309496
- 克隆虚拟机配置
## root下修改ip地址,主机名
vi /etc/sysconfig/network-scripts/ifcfg-ens33
## 修改主机名
hostnamectl set-hostname xxx
## 重启网络服务
systemctl restart network
- 注意:保证主机名映射文件在克隆前的主机上已经配置了,保证用户名具有root权限也配置了,否则:修改/etc/hosts主机名映射文件,修改用户具有root权限/etc/sudoers
配置hadoop集群
https://blog.csdn.net/weixin_37680513/article/details/107740252
配置zookeeper集群
-
安装配置
## 创建目录zkData,创建myid,配置其他节点myid,与cluster配置保存一致
## 修改conf/下的zoo.cfg
dataDir=/opt/module/zookeeper-3.4.10/zkData
# 集群配置 myid:主机名:交换信息端口:选举端口
#######################cluster##########################
server.2=localhost102:2888:3888
server.3=localhost103:2888:3888
server.4=localhost104:2888:3888
## 注意分发,分发后修改myid
- 启动和停止,常用命令
bin/zkServer.sh start
bin/zkServer.sh stop
bin/zkServer.sh status # 查看状态
bin/zkCli.sh # 启动客户端
quit # 退出客户端
## 常用命令
ls create get delete set rmr
- 集群启动脚本zk.sh
#!/bin/bash
case $1 in
"start"){
echo ------------- 启动zookeeper集群 ---------------
for i in localhost102 localhost103 localhost104
do
ssh $i "/opt/module/zookeeper-3.4.10/bin/zkServer.sh start"
done
};;
"stop"){
echo ------------- 停止zookeeper集群 ---------------
for i in localhost102 localhost103 localhost104
do
ssh $i "/opt/module/zookeeper-3.4.10/bin/zkServer.sh stop"
done
};;
"status"){
echo ------------- 查看zookeeper集群 ---------------
for i in localhost102 localhost103 localhost104
do
ssh $i "/opt/module/zookeeper-3.4.10/bin/zkServer.sh status"
done
};;
esac
安装flume
- 安装配置
## 修改conf/flume-env.sh中JAVA_HOME绝对路径
export JAVA_HOME=/opt/module/jdk1.8.0_144
## 自定义的拦截器放在lib目录下
## 在conf下添加.conf配置文件或新建jobs目录
- 启动和关闭
bin/flume-ng agent -c conf -f conf/file-flume-kafka.conf -n a1 -Dflume.root.logger=INFO,LOGFILE
bin/flume-ng agent -c conf -f conf/file-flume-kafka.conf -n a1 -Dflume.root.logger=INFO,console
ps -ef | grep file-flume-kafka | grep -v grep |awk '{print $2}' | xargs kill
-
脚本f1.sh
#!/bin/bash
case $1 in
"start"){
for i in localhost102 localhost103
do
echo " --------启动 $i 采集 flume-------"
ssh $i "nohup /opt/module/flume/bin/flume-ng agent --conf-file /opt/module/flume/conf/file-flume-kafka.conf --name a1 -Dflume.root.logger=INFO,LOGFILE >/opt/module/flume/test1 2>&1 &"
done
};;
"stop"){
for i in localhost102 localhost103
do
echo " --------停止 $i 采集 flume-------"
ssh $i "ps -ef | grep file-flume-kafka | grep -v grep |awk '{print \$2}' | xargs kill"
done
};;
esac
配置kafka集群
- 安装和配置
## 修改conf/server.properties
broker.id=102 # 保证id不一样
delete.topic.enable=true #删除topic使能
log.dirs=/opt/module/kafka/data #存储消息消费的偏移量的位置 内部topic 50个分区
zookeeper.connect=localhost102:2181,localhost103:2181,localhost104:2181 # 配置zookeeper集群
- 启动和关闭
bin/kafka-server-start.sh -daemon config/server.properties
bin/kafka-server-stop.sh stop
- poducer、consumer压测
## 生产者压测
bin/kafka-producer-perf-test.sh --topic test --record-size 100 --num-records 100000 --throughput -1 --producer-props bootstrap.servers=localhost102:9092,localhost103:9092,localhost104:9092
record-size 是一条信息有多大,单位是字节。
num-records 是总共发送多少条信息。
throughput 是每秒多少条信息,设成-1,表示不限流,可测出生产者最大吞吐量。
## 消费者压测
bin/kafka-consumer-perf-test.sh --zookeeper localhost102:2181 --topic test --fetch-size 10000 --messages 10000000 --threads 1
-
启动和停止脚本kk.sh
#! /bin/bash
case $1 in
"start"){
for i in localhost102 localhost103 localhost104
do
echo " --------启动 $i Kafka-------"
ssh $i "/opt/module/kafka/bin/kafka-server-start.sh -daemon /opt/module/kafka/config/server.properties "
done
};;
"stop"){
for i in localhost102 localhost103 localhost104
do
echo " --------停止 $i Kafka-------"
ssh $i "/opt/module/kafka/bin/kafka-server-stop.sh stop"
done
};;
esac
- 采集通道启停脚本
#! /bin/bash
case $1 in
"start"){
echo " --------------------- 启动 集群 -------------------"
echo " ------------------- 启动 hadoop 集群 ----------------------"
/opt/module/hadoop-2.7.2/sbin/start-dfs.sh
ssh localhost103 "/opt/module/hadoop-2.7.2/sbin/start-yarn.sh"
#启动 Zookeeper 集群
zk.sh start
sleep 4s;
#启动 Flume 采集集群
f1.sh start
#启动 Kafka 采集集群
kk.sh start
sleep 6s;
#启动 Flume 消费集群
f2.sh start
};;
"stop"){
echo " -------------------- 停止 集群 ------------------------"
#停止 Flume 消费集群
f2.sh stop
#停止 Kafka 采集集群
kk.sh stop
sleep 6s;
#停止 Flume 采集集群
f1.sh stop
#停止 Zookeeper 集群
zk.sh stop
echo " ---------------------- 停止 hadoop 集群 --------------------"
ssh localhost103 "/opt/module/hadoop-2.7.2/sbin/stop-yarn.sh"
/opt/module/hadoop-2.7.2/sbin/stop-dfs.sh
};;
esac
安装mysql
- 下载,https://dev.mysql.com/downloads/,server,client,java-connect,cdh还需要shared-compat
# 确定mysql文件目录具有root权限
chmod 777 -R xxx
chmod root:root -R xxx
# 先卸载自带的数据库
rpm -qa | grep mariadb
rpm -e --nodeps mariadb-libs-5.5.64-1.el7.x86_64
或者rpm -qa | grep mariadb | xargs rpm -e --nodeps
# 查找并卸载老版本mysql
find / -name mysql|xargs rm -rf
或者
rpm -qa | grep mysql | xargs rpm -e --nodeps
yum -y install autoconf
# 删除干净 卸载干净后安装
rpm -ivh MySQL-server-5.6.24-1.el6.x86_64.rpm
# 获取随机密码
cat /root/.mysql_secret
# 查看状态及启动
systemctl status mysqld
systemctl start mysql
# 安装客户端
rpm -ivh MySQL-client-5.6.24-1.el6.x86_64.rpm
# 进入客户端
mysql -uroot -p随机密码
# 设置密码
set password=password('999999');
或者使用命令mysql_secure_installation设置
# 设置所有主机登录
show databases;
use mysql;
show tables;
select user, host, password from user;
update user set host='%' where host="localhost";
delete from user where host="127.0.0.1";
delete from user where host="::1";
delete from user where host="localhost102";
# 以后就可以使用一下方式登录了
mysql -uroot -p密码
安装sqoop
## 修改conf/sqoop-env.sh,添加如下路径
export HADOOP_COMMON_HOME=/opt/module/hadoop-2.7.2
export HADOOP_MAPRED_HOME=/opt/module/hadoop-2.7.2
export HIVE_HOME=/opt/module/hive
export ZOOKEEPER_HOME=/opt/module/zookeeper-3.4.10
export ZOOCFGDIR=/opt/module/zookeeper-3.4.10/conf
export HBASE_HOME=/opt/module/hbase
## 拷贝mysql JDBC驱动到sqoop/lib下
cp mysql-connector-java-5.1.27.tar.gz /opt/module/sqoop/lib/
## 验证安装成功
bin/sqoop help
## 验证是否可以连接数据库
bin/sqoop list-databases --connect jdbc:mysql://localhost102:3306/ --username root --password 999999
安装hive2.3
## 安装mysql
## 配置hive-env.sh
export HADOOP_HOME=/opt/module/hadoop-2.7.2
export HIVE_CONF_DIR=/opt/module/hive/conf
## 拷贝mysql驱动到hive/lib下
cp mysql-connector-java-5.1.27-bin.jar /opt/module/hive/lib/
## 在conf/下创建hive-site.xml 配置如下
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
<name>javax.jdo.option.ConnectionURL</name>
<value>jdbc:mysql://localhost102:3306/metastore?createDatabaseIfNotExist=true</value>
<description>JDBC connect string for a JDBC metastore</description>
</property>
<property>
<name>javax.jdo.option.ConnectionDriverName</name>
<value>com.mysql.jdbc.Driver</value>
<description>Driver class name for a JDBC metastore</description>
</property>
<property>
<name>javax.jdo.option.ConnectionUserName</name>
<value>root</value>
<description>username to use against metastore database</description>
</property>
<property>
<name>javax.jdo.option.ConnectionPassword</name>
<value>999999</value>
<description>password to use against metastore database</description>
</property>
<property>
<name>hive.metastore.warehouse.dir</name>
<value>/user/hive/warehouse</value>
<description>location of default database for the warehouse</description>
</property>
<property>
<name>hive.cli.print.header</name>
<value>true</value>
</property>
<property>
<name>hive.cli.print.current.db</name>
<value>true</value>
</property>
<property>
<name>hive.metastore.schema.verification</name>
<value>false</value>
</property>
<property>
<name>datanucleus.schema.autoCreateAll</name>
<value>true</value>
</property>
</configuration>
## 进入hive
bin/hive
##退出
quit;
## 配置元数据服务 不配置的话默认开启 配置的话需要手动开启
<property>
<name>hive.metastore.uris</name>
<value>thrift://localhost102:9083</value>
</property>
# 开启
nohup bin/hive --service metastore >dev/null 2>&1 &
hive集成Tez引擎
## 将依赖包上传至hdfs://tez下
hadoop fs -mkdir /tez
hadoop fs -put apache-tez-0.9.1-bin.tar.gz /tez
## 本地解压
tar -xvzf apache-tez-0.9.1-bin.tar.gz -C /opt/module/
mv apache-tez-0.9.1-bin/ tez-0.9.1
## hive/conf下创建tez-site.xml
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
<name>tez.lib.uris</name>
<value>${fs.defaultFS}/tez/apache-tez-0.9.1-bin.tar.gz</value>
</property>
<property>
<name>tez.use.cluster.hadoop-libs</name>
<value>true</value>
</property>
<property>
<name>tez.history.logging.service.class</name>
<value>org.apache.tez.dag.history.logging.ats.ATSHistoryLoggingService</value>
</property>
</configuration>
## 在hive-env.sh中配置环境变量和依赖包环境变量
export HADOOP_HOME=/opt/module/hadoop-2.7.2
export HIVE_CONF_DIR=/opt/module/hive/conf
export TEZ_HOME=/opt/module/tez-0.9.1 #是你的tez的解压目录
export TEZ_JARS=""
for jar in `ls $TEZ_HOME |grep jar`; do
export TEZ_JARS=$TEZ_JARS:$TEZ_HOME/$jar
done
for jar in `ls $TEZ_HOME/lib`; do
export TEZ_JARS=$TEZ_JARS:$TEZ_HOME/lib/$jar
done
export HIVE_AUX_JARS_PATH=/opt/module/hadoop-2.7.2/share/hadoop/common/hadoop-lzo-0.4.20.jar$TEZ_JARS
## 在hive-site.xml中配置hive执行引擎
<property>
<name>hive.execution.engine</name>
<value>tez</value>
</property>
## 由于tez使用内存过多而被NodeManager杀死进程,配置hadoop集群yarn-site.xml取消内存检查
# 注意需要分发到其他节点
# 重启yarn
<property>
<name>yarn.nodemanager.vmem-check-enabled</name>
<value>false</value>
</property>
## 测试tez 在bin/hive客户端
create table t1(id int);
insert into t1(1001), (1002);
安装azkaban
-
安装配置
## 准备文件
azkaban-web-server-2.5.0.tar.gz
azkaban-executor-server-2.5.0.tar.gz
azkaban-sql-script-2.5.0.tar.gz
mysql-libs.zip
## 解压到/opt/module/azkaban 重命名为server executor
## 进入mysql数据库 创建数据库azkaban
mysql -uroot -p999999
create database azkaban;
use azkaban;
source /opt/module/azkaban/azkaban-2.5.0/create-all-sql-2.5.0.sql
## 生成密钥库 密钥库密码和口令密码相同 方便记忆
keytool -keystore keystore -alias jetty -genkey -keyalg RSA
## 拷贝到server目录下
mv keystore /opt/module/azkaban/server/
## 时间同步
tzselect
cp /usr/share/zoneinfo/Asia/Shanghai /etc/localtime #覆盖时区
sudo date -s '2018-10-18 16:39:30' #所有节点执行
sudo ntpdate -u ntp.api.bz #更新时间命令 可以用这个更新时间
## 修改server/conf/azkaban.properties
# 默认web server存放web文件的目录
web.resource.dir=/opt/module/azkaban/server/web/
# 默认时区,已改为亚洲/上海默认为美国
default.timezone.id=Asia/Shanghai
# 用户权限管理默认类(绝对路径)
user.manager.xml.file=/opt/module/azkaban/server/conf/azkaban-users.xml
# global 配置文件所在位置(绝对路径)
executor.global.properties=properties=/opt/module/azkaban/executor/conf/global.properties
# 数据库配置
mysql.host=localhost102
mysql.database=azkaban
mysql.user=root
mysql.password=999999
# keystore配置
jetty.keystore=/opt/module/azkaban/server/keystore
jetty.password=999999
jetty.keypassword=999999
jetty.truststore=/opt/module/azkaban/server/keystore
jetty.trustpassword=999999
## 修改server/conf/azkaban-users.xml 添加账户密码
<user username="admin" password="admin" roles="admin,metrics"/>
## 修改executor/conf/azkaban.properties
default.timezone.id=Asia/Shanghai
executor.global.properties=/opt/module/azkaban/executor/conf/global.properties
# 数据库配置
mysql.host=localhost102
mysql.database=azkaban
mysql.user=root
mysql.password=999999
-
启动和关闭
## executor下执行
bin/azkaban-executor-start.sh
bin/azkaban-executor-shutdown.sh
## server下执行
bin/azkaban-web-start.sh
bin/azkaban-web-shutdown.sh
## 网页访问:https://localhost102:8443/
## 注意:先执行executor,再执行web,避免Web Server会因为找不到执行器启动失败
- 使用
## 编写.job文件 打包成.zip文件上传
## 运行command
type=command
command=xxxx
## 运行脚本
type=command
command=sh xxxx 或/bin/bash xxx # 脚本作为一个参数
## 运行java程序
type=javaprocess
java.class=全类名
classpath=/xxx/xxx/xxx.jar或一起打包./xxx.jar
## 多个有依赖的任务 添加如下
dependencies=xxx
## hdfs操作任务 注意:是在executor所在的节点上执行的命令
type=command
command=/opt/module/hadoop-2.7.2/bin/hadoop fs -mkdir /azkaban
## MapReduce任务 注意:是在executor所在的节点上执行的命令
type=command
command=/opt/module/hadoop-2.7.2/bin/hadoop jar
/opt/module/hadoop-2.7.2/share/hadoop/mapreduce/hadoop-mapreduce-examples-2.7.2.jar wordcount /wordcount/input /wordcount/output
## hive脚本任务 注意:是在executor所在的节点上执行的命令 hive必须在同一个节点上
type=command
command=/opt/module/hive/bin/hive -f /opt/module/azkaban/jobs/student.sql
常用脚本
- 集群分发脚本
#!/bin/bash
## 1 获取参数的个数,如果没有参数,直接退出
pcount=$#
if((pcount==0));then
echo no args;
exit;
fi
## 2 获取文件名称
p1=$1
fname=`basename $p1`
echo fname=$fname
## 3 获取上级目录到绝对路径
pdir=`cd -P $(dirname $p1); pwd`
echo pdir=$pdir
## 4 获取当前用户的名称
user=`whoami`
## 5 循环
for((host=103; host < 105; host++)); do
echo -------------------- localhost$host --------------------
rsync -rvl $pdir/$fname $user@localhost$host:$pdir
done
- 调用脚本
#!/bin/bash
for i in localhost102 localhost103 localhost104
do
if [[ -z $1 ]]; then
echo ---------$i ----------
ssh $i jps
else
echo ---------$i ----------
ssh $i $*
fi
done
---
本文来自博客园,作者:Bingmous,转载请注明原文链接:https://www.cnblogs.com/bingmous/p/15643700.html