实时数仓-持续更新
镜像服务器清理
对于整个机器
rm -rf /tmp/*
rm -rf /usr/tmp/*
rm -rf /var/log/*
rm -rf /var/run/log/*
rm -rf /root/*
rm -rf /paimon
对于Dinky
rm -rf /opt/service/dinky-release-1.17-1.0.3/logs/*
rm -rf /opt/service/dinky-release-1.17-1.0.3/tmp/*
对于Hadoop
rm -rf /opt/service/hadoop-3.2.4/data/*
rm -rf /opt/service/hadoop-3.2.4/logs/*
rm -rf /opt/service/hadoop-3.2.4/tmp/*
对于Kafka
rm -rf /opt/service/kafka_2.12-3.0.0/data/*
rm -rf /opt/service/kafka_2.12-3.0.0/logs/*
对于zookeeper
rm -rf /opt/service/zookeeper-3.5.10/data/zkdata/*
rm -rf /opt/service/zookeeper-3.5.10/data/zkdatalog/*
rm -rf /opt/service/zookeeper-3.5.10/logs/*
JDK(基础组件)
https://www.oracle.com/java/technologies/downloads/archive/
解压后配置`/etc/profile`
#JAVA
export JAVA_HOME=/opt/service/jdk1.8.0_401
export JRE_HOME=${JAVA_HOME}/jre
export CLASSPATH=.:${JAVA_HOME}/lib:${JRE_HOME}/lib:$CLASSPATH
export JAVA_PATH=${JAVA_HOME}/bin:${JRE_HOME}/bin
export PATH=$PATH:${JAVA_PATH}
使命令生效
source /etc/profile
检验Java版本
java -version
MYSQL
安装脚本
#!/bin/bash
set -x
function Install_Mysql(){
while :
do
read -p "Do you need to install MySQL(yes/no): " my_result
if [ "$my_result" == "no" ];then
which mysql >/dev/null 2>&1
if [ "$?" != "0" ];then
echo "MySQL client is not installed on this machine. Start to install now"
cd $dir
wget -O "mysql5.7.tar.gz" https://s3-gzpu.didistatic.com/pub/mysql5.7.tar.gz
mkdir -p $dir/mysql/ && cd $dir/mysql/
tar -zxf $dir/mysql5.7.tar.gz -C $dir/mysql/
rpm -ivh $dir/mysql/mysql-community-common-5.7.36-1.el7.x86_64.rpm
rpm -ivh $dir/mysql/mysql-community-libs-5.7.36-1.el7.x86_64.rpm
rpm -ivh $dir/mysql/mysql-community-client-5.7.36-1.el7.x86_64.rpm
fi
read -p "Please enter the MySQL service address: " mysql_ip
read -p "Please enter MySQL service port(default is 3306): " mysql_port
read -p "Please enter the root password of MySQL service: " mysql_pass
if [ "$mysql_port" == "" ];then
mysql_port=3306
fi
break
elif [ "$my_result" == "yes" ];then
read -p "Installing MySQL service will uninstall the installed(if any), Do you want to continue(yes/no): " option
if [ "$option" == "yes" ];then
cd $dir
wget -O "mysql5.7.tar.gz" https://s3-gzpu.didistatic.com/pub/mysql5.7.tar.gz
rpm -qa | grep -w -E "mariadb|mysql" | xargs yum -y remove >/dev/null 2>&1
mv -f /var/lib/mysql/ /var/lib/mysqlbak$(date "+%s") >/dev/null 2>&1
mkdir -p $dir/mysql/ && cd $dir/mysql/
tar -zxf $dir/mysql5.7.tar.gz -C $dir/mysql/
yum -y localinstall mysql* libaio*
systemctl start mysqld
systemctl enable mysqld >/dev/null 2>&1
old_pass=`grep 'temporary password' /var/log/mysqld.log | awk '{print $NF}' | tail -n 1`
mysql -NBe "alter user USER() identified by '$mysql_pass';" --connect-expired-password -uroot -p$old_pass
if [ $? -eq 0 ];then
mysql_ip="127.0.0.1"
mysql_port="3306"
echo "Mysql database installation completed"
echo -------- Mysql Password $old_pass ---------
else
echo -e "${RED} Mysql database configuration failed. The script exits ${RES}"
exit
fi
break
else
exit 1
fi
else
Printlog "Input error, please re-enter(yes/no)"
continue
fi
done
}
#参数声明
dir=`pwd`
RED='\E[1;31m'
RES='\E[0m'
#调用
Install_Mysql
SSH(集群免密)
--参考地址
https://www.jianshu.com/p/b71c58a598b5
端口:22
修改配置文件(三台) `/etc/hosts`
echo "::1 localhost localhost.localdomain localhost6 localhost6.localdomain6
127.0.0.1 localhost localhost.localdomain localhost4 localhost4.localdomain4
192.168.99.215 bigdata01 iZ2ze3nalp8guto80cb08tZ
192.168.99.216 bigdata02 iZ2ze3nalp8guto80cb08sZ
192.168.99.214 bigdata03 iZ2ze3nalp8guto80cb08rZ" > /etc/hosts
互相免密设置(三台)
ssh-keygen -t rsa
.ae[%oTuPP~G%}3,hy{UPB&&}8M}18
#连续三次回车
ssh-copy-id root@bigdata01
#输入密码
#Odds!@#123
ssh-copy-id root@bigdata02
#输入密码
ssh-copy-id root@bigdata03
#输入密码
Zookeeper(集群)
分布式协调服务,可以帮助分布式应用程序****实现数据同步
-- 参考地址
https://www.cnblogs.com/maoxianwan/articles/17486380.html
端口:2181/2888/3888
解压后配置`/etc/profile`
#ZOOKEEPER
export ZOOKEEPER_HOME=/opt/hadoop/apache-zookeeper-3.8.0-bin/
export PATH=$PATH:$ZOOKEEPER_HOME/bin
使命令生效
source /etc/profile
修改配置文件` ```$ZOOKEEPER_HOME/conf/zoo.cfg``` `
# The number of milliseconds of each tick
tickTime=2000
# The number of ticks that the initial
# synchronization phase can take
initLimit=10
# The number of ticks that can pass between
# sending a request and getting an acknowledgement
syncLimit=5
# the directory where the snapshot is stored.
# do not use /tmp for storage, /tmp here is just
# example sakes.
dataDir=/opt/service/zookeeper-3.5.10/data/zkdata
dataLogDir=/opt/service/zookeeper-3.5.10/data/zkdatalog
# the port at which the clients will connect
clientPort=2181
# the maximum number of client connections.
# increase this if you need to handle more clients
#maxClientCnxns=60
#
# Be sure to read the maintenance section of the
# administrator guide before turning on autopurge.
#
# http://zookeeper.apache.org/doc/current/zookeeperAdmin.html#sc_maintenance
#
# The number of snapshots to retain in dataDir
#autopurge.snapRetainCount=3
# Purge task interval in hours
# Set to "0" to disable auto purge feature
#autopurge.purgeInterval=1
server.1=bigdata01:2888:3888
server.2=bigdata02:2888:3888
server.3=bigdata03:2888:3888
在目录dataDir
创建服务节点编号文件myid
,与server.后的数字对应
自定义集群启停脚本
#!/bin/bash
for host in bigdata01 bigdata02 bigdata03
do
case $1 in
"start"){
echo " "
echo "--------------- 启 动 zookeeper ---------------"
echo "------------ $host zookeeper -----------"
ssh $host "source /etc/profile; zkServer.sh start"
};;
"stop"){
echo " "
echo "--------------- 关 闭 zookeeper ---------------"
echo "------------ $host zookeeper -----------"
ssh $host "source /etc/profile; zkServer.sh stop"
};;
"status"){
echo " "
echo "-------------- 查看zookeeper状态 --------------"
echo "------------ $host zookeeper -----------"
ssh $host "source /etc/profile; zkServer.sh status"
};;
esac
done
`chmod +x /bash/zkCluster.sh`
`cp /bash/zkCluster.sh /bin/`
测试
`zkCluster.sh status`
Kafka(集群)
--参考地址
https://blog.csdn.net/snipercai/article/details/131812772
端口:9092/9999
解压后配置`/etc/profile`
#HADOOP
export HADOOP_HOME=/opt/service/hadoop-3.2.4
export HADOOP_LOG_DIR=$HADOOP_HOME/logs
export PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin
Hadoop(集群)
--参考地址
https://blog.csdn.net/snipercai/article/details/131812772
端口:9000/9870/8485/10020/19888/8088
解压后配置`/etc/profile`
#HADOOP
export HADOOP_HOME=/opt/service/hadoop-3.2.4
export HADOOP_LOG_DIR=$HADOOP_HOME/logs
export PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin
各配置文件
core-site.xml
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<!-- Put site-specific property overrides in this file. -->
<configuration>
<!-- 设置默认使用的文件系统 Hadoop支持file、HDFS、GFS等文件系统 -->
<property>
<name>fs.defaultFS</name>
<value>hdfs://ccns</value>
</property>
<!-- 设置Hadoop临时目录路径 -->
<property>
<name>hadoop.tmp.dir</name>
<value>/opt/service/hadoop-3.2.4/tmp</value>
</property>
<!-- 指定zookeeper地址 -->
<property>
<name>ha.zookeeper.quorum</name>
<value>bigdata01:2181,bigdata02:2181,bigdata03:2181</value>
</property>
<!-- 设置Hadoop本地保存数据路径 -->
<property>
<name>hadoop.data.dir</name>
<value>/opt/service/hadoop-3.2.4/data</value>
</property>
<!-- 设置HDFS web UI用户身份 -->
<property>
<name>hadoop.http.staticuser.user</name>
<value>root</value>
</property>
<!-- 文件系统垃圾桶保存时间 -->
<property>
<name>fs.trash.interval</name>
<value>1440</value>
</property>
<property>
<name>fs.alluxio.impl</name>
<value>alluxio.hadoop.FileSystem</value>
</property>
</configuration>
hdfs-site.xml
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<!-- Put site-specific property overrides in this file. -->
<configuration>
<!--执行hdfs的nameservice为ns,注意要和core-site.xml中的名称保持一致 -->
<property>
<name>dfs.nameservices</name>
<value>ccns</value>
</property>
<!-- nameservice包含的namenode,ns集群下有两个namenode,分别为nn1, nn2 -->
<property>
<name>dfs.ha.namenodes.ccns</name>
<value>nn1,nn2</value>
</property>
<!-- nn1的rpc地址和端口号,rpc用来和datanode通讯,默认值:9000-->
<property>
<name>dfs.namenode.rpc-address.ccns.nn1</name>
<value>bigdata01:9000</value>
</property>
<!-- nn2的rpc地址和端口号,rpc用来和datanode通讯,默认值:9000-->
<property>
<name>dfs.namenode.rpc-address.ccns.nn2</name>
<value>bigdata02:9000</value>
</property>
<!-- nn1的http地址和端口号,web客户端 -->
<property>
<name>dfs.namenode.http-address.ccns.nn1</name>
<value>bigdata01:9870</value>
</property>
<!-- nn2的http地址和端口号,web客户端 -->
<property>
<name>dfs.namenode.http-address.ccns.nn2</name>
<value>bigdata02:9870</value>
</property>
<!-- 指定namenode的元数据在JournalNode上存放的位置,namenode2可以从journalnode集群里的指定位置上获取信息,达到热备效果 -->
<property>
<name>dfs.namenode.shared.edits.dir</name>
<value>qjournal://bigdata01:8485;bigdata02:8485;bigdata03:8485/ccns</value>
</property>
<!-- 配置失败自动切换实现方式,客户端连接可用状态的NameNode所用的代理类,默认值:org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider -->
<property>
<name>dfs.client.failover.proxy.provider.ccns</name>
<value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
</property>
<!-- 配置隔离机制,HDFS的HA功能的防脑裂方法。建议使用sshfence(hadoop:9922),括号内的是用户名和端口,注意,2台NN之间可免密码登陆.sshfences是防止脑裂的方法,保证NN中仅一个是Active的,如果2者都是Active的,新的会把旧的强制Kill -->
<property>
<name>dfs.ha.fencing.methods</name>
<value>sshfence</value>
</property>
<!-- 开启NameNode失败自动切换 -->
<property>
<name>dfs.ha.automatic-failover.enabled</name>
<value>true</value>
</property>
<!-- 指定上述选项ssh通讯使用的密钥文件在系统中的位置 -->
<property>
<name>dfs.ha.fencing.ssh.private-key-files</name>
<value>/root/.ssh/id_rsa</value>
</property>
<!-- 指定JournalNode在本地磁盘存放数据的位置。 -->
<property>
<name>dfs.journalnode.edits.dir</name>
<value>/opt/service/hadoop-3.2.4/data/journalnode</value>
</property>
<!--配置namenode存放元数据的目录,默认放到hadoop.tmp.dir下-->
<property>
<name>dfs.namenode.name.dir</name>
<value>file:///opt/service/hadoop-3.2.4/data/namenode</value>
</property>
<!--配置datanode存放元数据的目录,默认放到hadoop.tmp.dir下-->
<property>
<name>dfs.datanode.data.dir</name>
<value>file:///opt/service/hadoop-3.2.4/data/datanode</value>
</property>
<!-- 副本数量配置 -->
<property>
<name>dfs.replication</name>
<value>2</value>
</property>
<!--设置用户的操作权限,false表示关闭权限验证,任何用户都可以操作-->
<property>
<name>dfs.permissions</name>
<value>false</value>
</property>
</configuration>
yarn-site.xml
<?xml version="1.0"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<configuration>
<!--开启ResourceManager
HA功能-->
<property>
<name>yarn.resourcemanager.ha.enabled</name>
<value>true</value>
</property>
<!--标志ResourceManager-->
<property>
<name>yarn.resourcemanager.cluster-id</name>
<value>ccyarn</value>
</property>
<!--集群中ResourceManager的ID列表,后面的配置将引用该ID-->
<property>
<name>yarn.resourcemanager.ha.rm-ids</name>
<value>rm1,rm2</value>
</property>
<!-- 设置YARN集群主角色运行节点rm1-->
<property>
<name>yarn.resourcemanager.hostname.rm1</name>
<value>bigdata01</value>
</property>
<!-- 设置YARN集群主角色运行节点rm2-->
<property>
<name>yarn.resourcemanager.hostname.rm2</name>
<value>bigdata02</value>
</property>
<!--ResourceManager1的Web页面访问地址-->
<property>
<name>yarn.resourcemanager.webapp.address.rm1</name>
<value>bigdata01:8088</value>
</property>
<!--ResourceManager2的Web页面访问地址-->
<property>
<name>yarn.resourcemanager.webapp.address.rm2</name>
<value>bigdata02:8088</value>
</property>
<!--ZooKeeper集群列表-->
<property>
<name>hadoop.zk.address</name>
<value>bigdata01:2181,bigdata02:2181,bigdata03:2181</value>
</property>
<!--启用ResouerceManager重启的功能,默认为false-->
<property>
<name>yarn.resourcemanager.recovery.enabled</name>
<value>true</value>
</property>
<!--用于ResouerceManager状态存储的类-->
<property>
<name>yarn.resourcemanager.store.class</name>
<value>org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore</value>
</property>
<!-- Nodemanager重启恢复机制-->
<!-- ShuffleHandler服务也已经支持在NM重启后恢复以前的状态-->
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<!-- 是否将对容器实施物理内存限制 -->
<property>
<name>yarn.nodemanager.pmem-check-enabled</name>
<value>false</value>
</property>
<!-- 是否将对容器实施虚拟内存限制 -->
<property>
<name>yarn.nodemanager.vmem-check-enabled</name>
<value>false</value>
</property>
<!-- 开启日志聚集功能 -->
<property>
<name>yarn.log-aggregation-enable</name>
<value>true</value>
</property>
<!-- 设置日志聚集服务器地址 -->
<property>
<name>yarn.log.server.url</name>
<value>http://bigdata01:19888/jobhistory/logs</value>
</property>
<!-- 设置日志保留时间 -->
<property>
<name>yarn.log-aggregation.retain-seconds</name>
<value>172800</value>
</property>
<!--Flink相关-->
<property>
<name>yarn.resourcemanager.am.max-attempts</name>
<value>4</value>
<description>
The maximum number of application master execution attempts.
</description>
</property>
<property>
<name>yarn.app.attempts</name>
<value>3</value>
</property>
<property>
<name>yarn.application.attempt.failures-validity-interval</name>
<value>5 minute</value>
</property>
<!-- 选择调度器。默认容量,此处用公平调度-->
<property>
<name>yarn.resourcemanager.scheduler.class</name>
<!--
<value>org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler</value> -->
<value>org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler</value>
</property>
<property>
<name>yarn.scheduler.fair.allocation.file</name>
<value>/opt/service/hadoop-3.2.4/etc/hadoop/fair-scheduler.xml</value>
<description>指明公平调度器队列分配配置文件</description>
</property>
<!--
在未指定队列的情况下,是否使用【用户名】作为队列名
当设置为true时,当`yellow`用户提交作业时,会自动创建并使用`root.yellow`队列
当设置为false时,所有用户默认使用`root.default`队列
当配置了`yarn.scheduler.fair.allocation.file`时,本配置将被忽略
-->
<property>
<name>yarn.scheduler.fair.user-as-default-queue</name>
<value>false</value>
</property>
<!-- 是否启用队列间抢占-->
<!-- <property>
<name>yarn.scheduler.fair.preemption</name>
<value>true</value>
</property> -->
<!-- 触发抢占的阈值:资源使用量与总容量的占比 -->
<!-- <property>
<name>yarn.scheduler.fair.preemption.cluster-utilization-threshold</name>
<value>0.7f</value>
</property> -->
<!-- 应用最大优先级 -->
<!-- <property>
<name>yarn.cluster.max-application-priority</name>
<value>100</value>
</property> -->
<!--ResourceManager处理调度器请求的现场数量,一共3*4=12,留出几个供其他使用-->
<property>
<name>yarn.resourcemanager.scheduler.client.thread-count</name>
<value>5</value>
</property>
<!--NodeManager相关-->
<!--是否让yarn自动检测硬件进行配置-->
<property>
<name>yarn.nodemanager.resource.detect-hardware-capabilities</name>
<value>false</value>
</property>
<!--是否将虚拟核数当作CPU核数
默认false-->
<property>
<name>yarn.nodemanager.resource.count-logical-processors-as-cores</name>
<value>true</value>
</property>
<!--虚拟核数和物理核数乘数-->
<property>
<name>yarn.nodemanager.resource.pcores-vcores-multiplier</name>
<value>16</value>
</property>
<!--nodemanager使用内存数,默认8g,此处设置为8g*0.8-->
<property>
<name>yarn.nodemanager.resource.memory-mb</name>
<value>30720</value>
<discription>每个节点可用内存,默认8192M(8G)</discription>
</property>
<!--nodemanager的CPU核数,默认设置为8个-->
<property>
<name>yarn.nodemanager.resource.cpu-vcores</name>
<value>96</value>
<discription>默认为8。每个节点可分配多少虚拟核给YARN使用,通常设为该节点定义的总虚拟核数即可。</discription>
</property>
<!--容器最小内存,默认1g-->
<property>
<name>yarn.scheduler.minimum-allocation-mb</name>
<value>1024</value>
<discription>单个任务可申请最少内存,默认1024MB</discription>
</property>
<!--容器最大内存,默认8g-->
<property>
<name>yarn.scheduler.maximum-allocation-mb</name>
<value>4096</value>
<discription>单个任务可申请最大内存,默认8192M(8G)</discription>
</property>
<!--容器最小CPU核数,默认2个-->
<property>
<name>yarn.scheduler.minimum-allocation-vcores</name>
<value>1</value>
<discription>指定RM可以为每个container分配的最小/最大虚拟核数,低 于或高于该限制的核申请,会按最小或最大核数来进行分配。默认值适合 一般集群使用。</discription>
</property>
<!--容器最大CPU核数,默认4个-->
<property>
<name>yarn.schedluler.maximum-allocation-vcores</name>
<value>6</value>
<discription>指定RM可以为每个container分配的最小/最大虚拟核数,低 于或高于该限制的核申请,会按最小或最大核数来进行分配。默认值适合 一般集群使用。</discription>
</property>
<!--虚拟内存和物理内存设置比例,默认2.1-->
<property>
<name>yarn.nodemanager.vmem-pmem-ratio</name>
<value>1</value>
<discription>物理内存不足时,使用的虚拟内存,默认是2.1,表示每使用1MB的物理内存,最多可以使用2.1MB的虚拟内存总量。</discription>
</property>
<property>
<name>yarn.nodemanager.vcores-pcores-ratio</name>
<value>16</value>
<discription>每使用一个物理cpu,可以使用的虚拟cpu的比例,默认为2</discription>
</property>
<property>
<!-- Maximum resources to allocate to application masters
If this is too high application masters can crowd out actual work -->
<name>yarn.scheduler.capacity.maximum-am-resource-percent</name>
<description>应用程序主控器(Application Master,简称 AM)可以使用的最大资源百分比</description>
<value>0.01</value>
</property>
</configuration>
mapred-site.xml
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<!-- Put site-specific property overrides in this file. -->
<configuration>
<!-- 设置MR程序默认运行模式,yarn集群模式,local本地模式 -->
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<property>
<name>yarn.app.mapreduce.am.env</name>
<value>HADOOP_MAPRED_HOME=${HADOOP_HOME}</value>
</property>
<!-- 历史服务器端地址 -->
<property>
<name>mapreduce.jobhistory.address</name>
<value>bigdata01:10020</value>
</property>
<!-- 历史服务器web端地址 -->
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>bigdata01:19888</value>
</property>
<!-- map环境变量 -->
<property>
<name>mapreduce.map.env</name>
<value>HADOOP_MAPRED_HOME=${HADOOP_HOME}</value>
</property>
<!-- reduce环境变量 -->
<property>
<name>mapreduce.reduce.env</name>
<value>HADOOP_MAPRED_HOME=${HADOOP_HOME}</value>
</property>
</configuration>
fair-site.xml
此文件根据yarn-site.xml进行配置
<?xml version="1.0"?>
<allocations>
<!-- 默认每个用户的最大应用程序数 -->
<userMaxAppsDefault>100</userMaxAppsDefault>
<!-- 根队列配置 -->
<queue name="root">
<!-- 最小资源分配: 512MB 内存, 1 个 vcore -->
<minResources>1024mb,1vcores</minResources>
<!-- 最大资源分配: 1024MB 内存, 2 个 vcore -->
<maxResources>153600mb,480vcores</maxResources>
<!-- 最大运行中的应用程序数 -->
<maxRunningApps>200</maxRunningApps>
<!-- 权重 -->
<weight>1.0</weight>
<!-- 调度模式: fair 表示公平调度 -->
<schedulingMode>fair</schedulingMode>
<!-- 提交应用程序的 ACL (Access Control List) -->
<aclSubmitApps>*</aclSubmitApps>
<!-- 管理应用程序的 ACL -->
<aclAdministerApps>*</aclAdministerApps>
<!-- default 子队列配置 -->
<queue name="default">
<!-- 最小资源分配: 256MB 内存, 1 个 vcore -->
<minResources>1024mb,1vcores</minResources>
<!-- 最大资源分配: 512MB 内存, 1 个 vcore -->
<maxResources>107520mb,300vcores</maxResources>
<!-- 最大运行中的应用程序数 -->
<maxRunningApps>100</maxRunningApps>
<!-- 调度模式: fair 表示公平调度 -->
<schedulingMode>fair</schedulingMode>
<!-- 权重 -->
<weight>1.0</weight>
<!-- 提交应用程序的 ACL: * 表示任何人都可以提交应用程序 -->
<aclSubmitApps>*</aclSubmitApps>
<!-- 添加 maxAMResourcePerApp 以限制 AM 的最大资源 -->
<maxAMResourcePerApp>60000mb,150vcores</maxAMResourcePerApp>
</queue>
<!-- taobao 子队列配置 -->
<queue name="taobao">
<!-- 最小资源分配: 1024MB 内存, 1 个 vcore -->
<minResources>1024mb,1vcores</minResources>
<!-- 最大资源分配: 46080MB 内存, 1 个 vcore -->
<maxResources>46080mb,180vcores</maxResources>
<!-- 最大运行中的应用程序数 -->
<maxRunningApps>100</maxRunningApps>
<!-- 调度模式: fair 表示公平调度 -->
<schedulingMode>fair</schedulingMode>
<!-- 权重 -->
<weight>1.0</weight>
<aclSubmitApps>*</aclSubmitApps>
<!-- 添加 maxAMResourcePerApp 以限制 AM 的最大资源 -->
<maxAMResourcePerApp>23040mb,180vcores</maxAMResourcePerApp>
</queue>
</queue>
</allocations>
使命令生效
source /etc/profile
修改配置文件(详见服务器)
创建对应数据存放目录
mkdir /opt/service/hadoop-3.2.4/tmp
mkdir /opt/service/hadoop-3.2.4/data
mkdir /opt/service/hadoop-3.2.4/data/journalnode
mkdir /opt/service/hadoop-3.2.4/data/namenode
mkdir /opt/service/hadoop-3.2.4/data/datanode
初始化集群
#bigdata01执行
hdfs zkfc -formatZK
### 每个节点分别启动 journalnode ###
hdfs --daemon start journalnode
### 初始化主namenode,此次选择bigdata01为主namenode ###
hdfs namenode -format
#启动主namenode
hdfs --daemon start namenode
#初始化从namenode,此次bigdata02为从namenode
hdfs namenode -bootstrapStandby
#启动所有进程
start-all.sh
- https://www.cnblogs.com/lenmom/p/11285273.html Yarn****调度器有关,后续视情况选择具体调度器
集群扩缩容(无需停机)
https://blog.csdn.net/andy_wcl/article/details/104558092
默认各个节点与集群总的存储使用率相差不超过10%,此集群磁盘空间较小,建议设置为5%,避免数据块倾斜
Yarn调优
如果一个服务器是32核,虚拟后为64核,128G内存,我们该如何设置上面的6个参数呢?即如何做到资源最大化利用,生产上我们一般要预留15-20%的内存,那么可用内存就是128*0.8=102.4G,去除其他组件的使用,我们设置成90G就可以了。就是说,yarn总可用资源是64核,90G;
设置单个任务可用的最大及最小核数
yarn.sheduler.maximum-allocation-vcores
一般就设置成4个,cloudera公司做过性能测试,如果CPU大于等于5之后,CPU的利用率反而不是很好。这个参数可以根据生成服务器决定,比如公司服务器很富裕,那就直接设置成1:1;设置成32,如果不是很富裕,可以直接设置成1:2。我们以1:2来计算。
yarn.scheduler.minimum-allocation-vcores
如果设置vcoure = 1,那么最大可以跑64/1=64个container,如果设置成这样,最小container是64/4=16个。
设置单个任务可用的最大及最小内存
yarn.scheduler.minmum-allocation-mb
默认是1G,如果设置成2G,那么90/2=45最多可以跑45个container,如果设置成4G,那么最多可以跑24个;vcore有些浪费。
yarn.scheduler.maximum-allocation-mb
Hadoop日志清理
已配置自动清理,如需手动清理,三台机器执行以下命令
rm -rf /opt/service/hadoop-3.2.4/logs/*.log.*
rm -rf /opt/service/hadoop-3.2.4/logs/*.out.*
Flink(On Yarn)
--参考地址
https://blog.csdn.net/ASN_forever/article/details/106234893
https://blog.csdn.net/weixin_52134189/article/details/139332965
端口:6123/8081
Flink On Yarn 情况下,单节点放flink即可
修改配置文件·yarn-site.xml·
<property>
<name>yarn.resourcemanager.am.max-attempts</name>
<value>4</value>
<description>
The maximum number of application master execution attempts.
</description>
</property>
修改配置文件·flink-conf.xml·
# jobmanager和taskmanager、其他client的RPC通信IP地址,TaskManager用于连接到JobManager/ResourceManager 。HA模式不用配置此项,在master文件中配置,由zookeeper选出leader与standby
jobmanager.rpc.address: 0.0.0.0
# jobmanager和taskmanager、其他client的RPC通信端口,TaskManager用于连接到JobManager/ResourceManager 。HA模式不用配置此项,在master文件中配置,由zookeeper选出leader与standby
jobmanager.rpc.port: 6123
# jobmanager JVM heap 内存大小
jobmanager.memory.process.size: 1024m
# taskmanager JVM heap 内存大小
taskmanager.memory.process.size: 2048m
# 每个taskmanager提供的任务slots数量
taskmanager.numberOfTaskSlots: 4
# 并行计算个数
parallelism.default: 4
# 高可用模式
high-availability.type: zookeeper
# # JobManager元数据保留在文件系统storageDir中 指向此状态的指针存储在ZooKeeper中
high-availability.storageDir: hdfs:///dinky/flink/recovery_demo
# # Zookeeper集群
high-availability.zookeeper.quorum: bigdata01:2181,bigdata02:2181,bigdata03:2181
# # 在zookeeper下的根目录
high-availability.zookeeper.path.root: /flink
# 单个flink job重启次数 必须小于等于yarn-site.xml中Application Master配置的尝试次数
yarn.application-attempts: 4
#==============================================================================
# Fault tolerance and checkpointing
#==============================================================================
# jobmanager (MemoryStateBackend), filesystem (FsStateBackend), rocksdb (RocksDBStateBackend)
# 检查点之间的时间间隔为180秒
execution.checkpointing.interval: 180000
# 两次系统检查点间最短间隔180秒
execution.checkpointing.min-pause: 180000
# 使用精确一次(EXACTLY_ONCE)模式进行检查点
execution.checkpointing.mode: EXACTLY_ONCE
# 状态后端使用文件系统(filesystem)
state.backend: filesystem
# 检查点存储目录
state.checkpoints.dir: hdfs:///dinky/flink/checkpoint
# Savepoint(保存点)
state.savepoints.dir: hdfs:///dinky/flink/savepoint
# 检查点超时时间为600000毫秒(10分钟)
execution.checkpointing.timeout: 600000
# 允许的最大并发检查点数为1个
execution.checkpointing.max-concurrent-checkpoints: 1
# 保留的最大检查点数为2个
state.checkpoints.num-retained: 2
# 在作业取消时删除外部化的检查点
execution.checkpointing.externalized-checkpoint-retention: DELETE_ON_CANCELLATION
# 是否应该创建增量检查点。对于增量检查点,只存储前一个检查点的差异,而不存储完整的检查点状态。一些状态后端可能不支持增量检查点并忽略此选项。
state.backend.incremental: false
# jobmanager故障恢复策略,指定作业计算如何从任务失败中恢复 full重新启动所有任务以恢复作业 region重新启动可能受任务失败影响的所有任务,region在目前(flink1.11)只对批处理有效,实时计算任然时full
jobmanager.execution.failover-strategy: region
#OSS相关
# fs.oss.endpoint: https://oss-cn-beijing-internal.aliyuncs.com
# fs.oss.accessKeyId: LTAI5tN3omJRJwcNyzUEo34v
# fs.oss.accessKeySecret: QgRfeoQ8TprkQPA0yknFvGcStTgQ4D
Dinky(编写入口)
--参考地址
https://dinky.org.cn/docs/1.0/deploy_guide/normal_deploy
端口:8888
默认账密:admin/dinky123!@#
集群运行时候与本地jar包无关,只需有config文件及hdfs中lib文件即可
补充
单个taskmanager/****jobmanager 并行度****,slot 上限为4,建议低于4或者配置4的倍数; taskmanager/jobmanager内存,上限为4096m,默认为1024m,建议1024m倍数修改或者默认; 并行度配置为slot的N倍,即可均匀增加taskmanager个数;
DolphinScheduler(待定-调度&监控集群)
--参考地址
端口:12345
默认账密: admin/dolphinscheduler123
问题记录
-
存在频繁报错任务,在yarn中不断打印日志,导致运行节点宕机,拟定关闭某些日志的打印;
-
存在资源滥用情况,即slot与并行度的配置,任务提交上去后及时观察并设置合理资源,然后发布并配置告警;
V_2.0
- 建议资源隔离,任务隔离,用容器进行部署
__EOF__
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】凌霞软件回馈社区,博客园 & 1Panel & Halo 联合会员上线
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】博客园社区专享云产品让利特惠,阿里云新客6.5折上折
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· 本地部署 DeepSeek:小白也能轻松搞定!
· 如何给本地部署的DeepSeek投喂数据,让他更懂你
· 从 Windows Forms 到微服务的经验教训
· 李飞飞的50美金比肩DeepSeek把CEO忽悠瘸了,倒霉的却是程序员
· 超详细,DeepSeek 接入PyCharm实现AI编程!(支持本地部署DeepSeek及官方Dee
2023-12-09 K8S-部署Kafka