Hadoop 系列配置文件记录
配置文件记录
标签(空格分隔): 配置
hs2:
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?><!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
--><configuration>
<!-- WARNING!!! This file is auto generated for documentation purposes ONLY! -->
<!-- WARNING!!! Any changes you make to this file will be ignored by Hive. -->
<!-- WARNING!!! You must make your changes in hive-site.xml instead. -->
<!-- Hive Execution Parameters -->
<!-- Hive Metastore Setting Turning Begin-->
<property>
<name>hive.metastore.server.max.threads</name>
<value>3000</value>
</property>
<property>
<name>hive.metastore.server.min.threads</name>
<value>50</value>
</property>
<property>
<name>datanucleus.connectionPool.maxPoolSize</name>
<value>50</value>
</property>
<!-- metastore - mysql-->
<property>
<name>javax.jdo.option.ConnectionURL</name>
<value>jdbc:mysql://rm-bp1c3c7j9l824i42x.mysql.rds.aliyuncs.com:3306/hive?createDatabaseIfNotExist=true&characterEncoding=UTF-8&useSSL=false</value>
<description></description>
</property>
<property>
<name>javax.jdo.option.ConnectionDriverName</name>
<value>com.mysql.jdbc.Driver</value>
<description></description>
</property>
<property>
<name>javax.jdo.option.ConnectionUserName</name>
<value>hive</value>
<description></description>
</property>
<property>
<name>javax.jdo.option.ConnectionPassword</name>
<value>FEjZt2Bu@wBd#bR</value>
<description></description>
</property>
<!-- Hive Metastore Setting Turning End-->
<!-- Hive HiveServer2 reload UDF -->
<property>
<name>hive.reloadable.aux.jars.path</name>
<value>/var/lib/hive/auxlib</value>
</property>
<!--only for our hdfs turning -->
<property>
<name>hive.insert.into.multilevel.dirs</name>
<value>true</value>
</property>
<property>
<name>hive.insert.into.external.tables</name>
<value>true</value>
</property>
<!-- Hive HiveServer2 Setting Turning -->
<property>
<name>mapred.max.split.size</name>
<value>128000000</value>
</property>
<property>
<name>mapred.min.split.size.per.node</name>
<value>128000000</value>
</property>
<property>
<name>mapred.min.split.size.per.rack</name>
<value>128000000</value>
</property>
<property>
<name>hive.input.format</name>
<value>org.apache.hadoop.hive.ql.io.CombineHiveInputFormat</value>
</property>
<property>
<name>hive.metastore.uris</name>
<value>thrift://VECS01219:9083,thrift://VECS00047:9083</value>
<description>Thrift URI for the remote metastore. Used by metastore client to connect to remote metastore.</description>
</property>
<property>
<name>hive.exec.scratchdir</name>
<value>/tmp/hive-staging</value>
</property>
<property>
<name>hive.metastore.warehouse.dir</name>
<value>hdfs://flashHadoop/user/hive/warehouse</value>
</property>
<property>
<name>hive.querylog.location</name>
<value>/data1/data/hive</value>
</property>
<property>
<name>hive.metastore.schema.verification</name>
<value>false</value>
<description>
</description>
</property>
<property>
<name>hive.async.log.enabled</name>
<value>false</value>
<description>
</description>
</property>
<property>
<name>hive.server2.support.dynamic.service.discovery</name>
<value>true</value>
<description>
</description>
</property>
<property>
<name>hive.server2.zookeeper.namespace</name>
<value>hiveserver2</value>
<description>The parent node in ZooKeeper used by HiveServer2 when supporting dynamic service discovery.</description>
</property>
<property>
<name>hive.zookeeper.quorum</name>
<value>VECS01118:2181,VECS01119:2181,VECS01120:2181,VECS01121:2181,VECS00047:2181</value>
<description>
</description>
</property>
<property>
<name>hive.zookeeper.client.port</name>
<value>2181</value>
<description>
</description>
</property>
<property>
<name>hive.server2.thrift.port</name>
<value>10000</value>
</property>
<property>
<name>hive.server2.thrift.bind.host</name>
<value>VECS011126</value>
</property>
<property>
<name>hive.server2.enable.doAs</name>
<value>true</value>
</property>
<property>
<name>hive.server2.session.check.interval</name>
<value>900000</value>
</property>
<property>
<name>hive.server2.idle.session.timeout</name>
<value>43200000</value>
</property>
<property>
<name>hive.server2.idle.session.timeout_check_operation</name>
<value>true</value>
</property>
<property>
<name>hive.server2.idle.operation.timeout</name>
<value>21600000</value>
</property>
<property>
<name>hive.server2.webui.host</name>
<value>0.0.0.0</value>
</property>
<property>
<name>hive.server2.webui.port</name>
<value>10002</value>
</property>
<property>
<name>hive.server2.webui.max.threads</name>
<value>50</value>
</property>
<property>
<name>hive.server2.webui.use.ssl</name>
<value>false</value>
</property>
<property>
<name>hive.server2.builtin.udf.blacklist</name>
<value>empty_blacklist</value>
</property>
<!-- Tez Turning -->
<property>
<name>hive.server2.tez.session.lifetime.jitter</name>
<value>30m</value>
</property>
<property>
<name>hive.server2.thrift.min.worker.threads</name>
<value>150</value>
</property>
<property>
<name>hive.server2.thrift.max.worker.threads</name>
<value>1000</value>
</property>
<property>
<name>hive.default.fileformat</name>
<value>orc</value>
<description>
Expects one of [textfile, sequencefile, rcfile, orc].
Default file format for CREATE TABLE statement. Users can explicitly override it by CREATE TABLE ... STORED AS [FORMAT]
</description>
</property>
<property>
<name>hive.default.fileformat.managed</name>
<value>orc</value>
<description>
Expects one of [none, textfile, sequencefile, rcfile, orc].
Default file format for CREATE TABLE statement applied to managed tables only. External tables will be
created with format specified by hive.default.fileformat. Leaving this null will result in using hive.default.fileformat
for all tables.
</description>
</property>
<!-- 开启server2服务metrics -->
<property>
<name>hive.server2.metrics.enabled</name>
<value>true</value>
</property>
<property>
<name>hive.security.authorization.sqlstd.confwhitelist.append</name>
<value>hive.*|mapred.*|tez.*|queue.*</value>
</property>
<property>
<name>hive.security.authorization.sqlstd.confwhitelist</name>
<value>hive.*|mapred.*|tez.*|queue.*</value>
</property>
<property>
<name>hive.warehouse.subdir.inherit.perms</name>
<value>false</value>
</property>
<property>
<name>hive.exec.dynamic.partition</name>
<value>true</value>
</property>
<property>
<name>hive.exec.dynamic.partition.mode</name>
<value>nonstrict</value>
</property>
<property>
<name>hive.mapred.reduce.tasks.speculative.execution </name>
<value>false</value>
</property>
<property>
<name>hive.exec.schema.evolution</name>
<value>false</value>
</property>
</configuration>
dir.sh
#!/bin/bash
groupadd hadoop
groupadd hdfs
groupadd yarn
useradd -g hdfs -G hadoop hdfs
useradd -g yarn -G hadoop yarn
#usermod -a -G hadoop deploy
#echo "hdfs" | passwd --stdin hdfs
#echo "yarn" | passwd --stdin yarn
for i in 1 2 3 4 5 6 7 8 9 10 11 12
do
datadir=/data"$i"/HDATA/dfs/local
mrdir=/data"$i"/HDATA/mapred/local
yarndir=/data"$i"/HDATA/yarn/local
yarnlog=/data"$i"/HDATA/yarn/logs
mkdir -p $datadir
mkdir -p $mrdir
mkdir -p $yarndir
mkdir -p $yarnlog
echo "$datadir $mrdir $yarndir $yarnlog make over and chown hdfs:hadoop"
chown hdfs:hadoop -R $datadir
chown yarn:yarn -R $mrdir $yarndir $yarnlog
done
#log
#mkdir -p /data/dfs/nn/local /data/dfs/jn
#chown hdfs:hadoop /data/dfs/nn/local /data/dfs/jn
#mkdir -p /log/hadoop /log/yarn /log/yarn-log /log/balant /log/hadoop-datanode-log/ /app/hadoop/tmp /app/var/run/hadoop-hdfs
mkdir -p /app/hadoop/tmp /var/run/hadoop-hdfs/ /app/hadoop/log
mkdir -p /app/hadoop/log/gc/
#chown hdfs:hadoop /log/balant /log/hadoop-datanode-log/ /app/hadoop/tmp /app/var/run/hadoop-hdfs
#chown yarn:hadoop /log/yarn /log/yarn-log
chown -R hdfs:hadoop /app/hadoop*
chown hdfs:hadoop /app/hadoop/tmp /app/hadoop/log /app/hadoop/log/gc/
chown -R hdfs:hdfs /var/run/hadoop-hdfs/
chmod g+w /app/hadoop/tmp -R
chmod g+w /app/hadoop/log -R
chmod -R 777 /app/hadoop/tmp
chmod -R 777 /app/hadoop/log
#chmod -R 777 /app/hadoop/bin
#chmod -R 777 /app/hadoop/sbin
chmod -R 755 /app/hadoop*
chmod -R 777 /app/hadoop/tmp
core-site.xml
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<!-- Put site-specific property overrides in this file. -->
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://flashHadoop</value>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>file:///app/hadoop/tmp/</value>
</property>
<property>
<name>io.file.buffer.size</name>
<value>131072</value>
</property>
<property>
<name>ha.zookeeper.quorum</name>
<value>VECS01120:2181,VECS01121:2181,VECS00047:2181,VECS01118:2181,VECS01119:2181</value>
</property>
<property>
<name>io.compression.codecs</name>
<value>org.apache.hadoop.io.compress.SnappyCodec,org.apache.hadoop.io.compress.DefaultCodec,org.apache.hadoop.io.compress.GzipCodec,org.apache.hadoop.io.compress.BZip2Codec,org.apache.hadoop.io.compress.Lz4Codec</value>
</property>
<property>
<name>fs.trash.interval</name>
<value>4320</value>
</property>
<property>
<name>fs.trash.checkpoint.interval</name>
<value>180</value>
</property>
<property>
<name>fs.protected.directories</name>
<value>/user/hive/warehouse,/user/hive/warehouse/hello.db,/user/hive/warehouse/bikedw.db,/user/hive/warehouse/bikemk.db,/user/hive/warehouse/datagroup.db,/user/hive/warehouse/bike.db,/user/hive/warehouse/bikeoss.db,/user/hive/warehouse/bike_test.db,/dataware/ods,/dataware/dm,/dataware/dw,/user/hive/warehouse/tmp.db,/dataware/st,/dataware/dim,/dataware/stg,/user/hive/warehouse/hw.db,/user/hive/warehouse/tableau.db,/user/hive/warehouse/zb.db,/user/hive/warehouse/hada.db,/dataware/rb,/user/hive/warehouse/bos.db,/user/hive/warehouse/test.db,/user/hive/warehouse/bikeda.db,/user/hive/warehouse/biketableau.db,/user/hive/warehouse/biketmp.db,/user/hive/warehouse/bikedmt.db,/user/hive/warehouse/odssub.db,/user/hive/warehouse/siwe.db,/user/hive/warehouse/ev_schedule.db,/user/hive/warehouse/ev_analysis.db,/user/hive/warehouse/ev_tmp.db,/user/hive/warehouse/turing.db,/user/hive/warehouse/datagroup_rentxbike.db,/user/hive/warehouse/alpha.db,/user/hive/warehouse/hitch.db,/user/hive/dataware/cs_da,/user/hive/dataware/cs_da_dev,/user/hive/dataware/hitch_da,/user/hive/dataware/hitch_da_dev,/user/hive/dataware/hitch_dw,/user/hive/dataware/hitch_dw_dev,/user/hive/dataware/hr_dw,/user/hive/dataware/hr_dw_dev,/user/hive/dataware/fin_da,/user/hive/dataware/fin_da_dev,/user/hive/warehouse/dw_tmp.db,/user/hive/dataware/alphapay_dev,/user/hive/dataware/alphapay,/user/hive/warehouse/alpha_dev.db,/user/hive/warehouse/bike_dev.db,/user/hive/warehouse/bike_test_dev.db,/user/hive/warehouse/bikeda_dev.db,/user/hive/warehouse/bikedmt_dev.db,/user/hive/warehouse/bikedw_dev.db,/user/hive/warehouse/bikemk_dev.db,/user/hive/warehouse/bikeoss_dev.db,/user/hive/warehouse/biketableau_dev.db,/user/hive/warehouse/biketmp_dev.db,/user/hive/warehouse/bos_dev.db,/user/hive/warehouse/cs_da_dev_dev.db,/user/hive/warehouse/datagroup_dev.db,/user/hive/warehouse/default_dev.db,/user/hive/warehouse/dim_dev.db,/user/hive/warehouse/dm_dev.db,/user/hive/warehouse/dw_dev.db,/user/hive/warehouse/dw_tmp_dev.db,/user/hive/warehouse/ev_analysis_dev.db,/user/hive/warehouse/ev_schedule_dev.db,/user/hive/warehouse/ev_tmp_dev.db,/user/hive/warehouse/hada_dev.db,/user/hive/warehouse/hello_dev.db,/user/hive/warehouse/hitch_dev.db,/user/hive/warehouse/hw_dev.db,/user/hive/warehouse/ods_dev.db,/user/hive/warehouse/odssub_dev.db,/user/hive/warehouse/rb_dev.db,/user/hive/warehouse/siwe_dev.db,/user/hive/warehouse/st_dev.db,/user/hive/warehouse/stg_dev.db,/user/hive/warehouse/tableau_dev.db,/user/hive/warehouse/test_dev.db,/user/hive/warehouse/tmp_dev.db,/user/hive/warehouse/turing_dev.db,/user/hive/warehouse/zb_dev.db,/user/hive/dataware/spower_dw_dev,/user/hive/dataware/risk_dev,/user/hive/warehouse/datagroup_rentxbike_dev.db,/user/hive/warehouse/bikepd_dev.db,/user/hive/dataware/spower_dw,/user/hive/dataware/risk,/user/hive/dataware/fintech_da,/user/hive/dataware/fintech_da_dev,/user/hive/warehouse/bikepd.db,/user/hive/warehouse/zhaobo_test.db,/user/hive/warehouse/bikedata.db,/user/hive/warehouse/riskctrl_dev.db,/user/hive/warehouse/riskctrl.db,/user/hive/warehouse/aiplatform.db,/user/hive/warehouse/fin_dw.db,/user/hive/warehouse/maidian_history.db,/user/hive/warehouse/platbusiness.db,/user/hive/warehouse/aiplatform_dev.db,/user/hive/warehouse/devops.db,/user/hive/warehouse/devops_dev.db,/user/hive/warehouse/ev_bike.db,/user/hive/warehouse/platbusiness_dev.db,/user/hive/warehouse/pro_test.db,/user/hive/warehouse/bigdata_dev.db,/user/hive/warehouse/zhaobo_test.db
</value>
</property>
<!-- <property>
<name>net.topology.script.file.name</name>
<value>/apps/hadoop-conf/rack.sh</value>
</property>
-->
<!-- HealthMonitor check namenode 的超时设置,默认50000ms,改为5mins -->
<property>
<name>ha.health-monitor.rpc-timeout.ms</name>
<value>300000</value>
</property>
<!-- zk failover的session 超时设置,默认5000ms,改为3mins -->
<property>
<name>ha.zookeeper.session-timeout.ms</name>
<value>180000</value>
</property>
<!--修改 HDFS 掩码,配合启用 HDFS-Ranger -->
<property>
<name>fs.permissions.umask-mode</name>
<value>077</value>
</property>
<property>
<name>hadoop.proxyuser.hive.hosts</name>
<value>VECS00047,VECS01219,VECS05137,VECS07009,VECS07010,VECS07011,VECS09647,VECS07008,VECS011123,VECS011124,VECS011125,VECS011126,VECS011127</value>
</property>
<property>
<name>hadoop.proxyuser.hive.groups</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.livy.hosts</name>
<value></value>
</property>
<property>
<name>hadoop.proxyuser.livy.groups</name>
<value>*</value>
</property>
</configuration>
hdfs-site.xml
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?><!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
--><!-- Put site-specific property overrides in this file. --><configuration>
<property>
<name>dfs.nameservices</name>
<value>flashHadoop</value>
</property>
<!-- flashHadoop -->
<property>
<name>dfs.ha.namenodes.flashHadoop</name>
<value>nn1,nn2</value>
</property>
<property>
<name>dfs.namenode.rpc-address.flashHadoop.nn1</name>
<value>VECS01118:8020</value>
</property>
<property>
<name>dfs.namenode.rpc-address.flashHadoop.nn2</name>
<value>VECS01119:8020</value>
</property>
<property>
<name>dfs.namenode.http-address.flashHadoop.nn1</name>
<value>VECS01118:50070</value>
</property>
<property>
<name>dfs.namenode.http-address.flashHadoop.nn2</name>
<value>VECS01119:50070</value>
</property>
<property>
<name>dfs.client.failover.proxy.provider.flashHadoop</name>
<value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
</property>
<property>
<name>dfs.namenode.shared.edits.dir.flashHadoop</name>
<value>qjournal://VECS01120:8485;VECS01121:8485;VECS00047:8485;VECS01118:8485;VECS01119:8485/flashHadoop</value>
</property>
<property>
<name>dfs.namenode.name.dir</name>
<value>file:///data1/data/flashHadoop/namenode/,file:///data2/data/flashHadoop/namenode/</value>
</property>
<property>
<name>dfs.namenode.support.allow.format</name>
<value>false</value>
</property>
o<property>
<name>dfs.datanode.data.dir</name>
<value>file:///data1/HDATA/dfs/local,
file:///data2/HDATA/dfs/local,
file:///data3/HDATA/dfs/local,
file:///data4/HDATA/dfs/local,
file:///data5/HDATA/dfs/local,
file:///data6/HDATA/dfs/local,
file:///data7/HDATA/dfs/local,
file:///data8/HDATA/dfs/local,
file:///data9/HDATA/dfs/local,
file:///data10/HDATA/dfs/local,
file:///data11/HDATA/dfs/local,
file:///data12/HDATA/dfs/local</value>
</property>
<property>
<name>dfs.journalnode.edits.dir</name>
<value>/data1/data/flashHadoop/journal</value>
</property>
<property>
<name>dfs.qjournal.start-segment.timeout.ms</name>
<value>60000</value>
</property>
<property>
<name>dfs.qjournal.prepare-recovery.timeout.ms</name>
<value>240000</value>
</property>
<property>
<name>dfs.qjournal.accept-recovery.timeout.ms</name>
<value>240000</value>
</property>
<property>
<name>dfs.qjournal.finalize-segment.timeout.ms</name>
<value>240000</value>
</property>
<property>
<name>dfs.qjournal.select-input-streams.timeout.ms</name>
<value>60000</value>
</property>
<property>
<name>dfs.qjournal.get-journal-state.timeout.ms</name>
<value>240000</value>
</property>
<property>
<name>dfs.qjournal.new-epoch.timeout.ms</name>
<value>240000</value>
</property>
<property>
<name>dfs.qjournal.write-txns.timeout.ms</name>
<value>60000</value>
</property>
<property>
<name>dfs.namenode.acls.enabled</name>
<value>true</value>
<description>Number of replication for each chunk.</description>
</property>
<!--需要根据实际配置进行修改-->
<property>
<name>dfs.ha.fencing.methods</name>
<value>
sshfence
shell(/bin/true)
</value>
</property>
<property>
<name>dfs.ha.fencing.ssh.private-key-files</name>
<value>/home/hdfs/.ssh/id_rsa</value>
</property>
<property>
<name>dfs.ha.automatic-failover.enabled</name>
<value>true</value>
</property>
<property>
<name>dfs.permissions.superusergroup</name>
<value>hadoop</value>
</property>
<property>
<name>dfs.datanode.max.transfer.threads</name>
<value>16384</value>
</property>
<property>
<name>dfs.hosts.exclude</name>
<value>/app/hadoop/etc/hadoop/exclude.list</value>
<description> List of nodes to decommission </description>
</property>
<property>
<name>dfs.datanode.fsdataset.volume.choosing.policy</name>
<value>org.apache.hadoop.hdfs.server.datanode.fsdataset.AvailableSpaceVolumeChoosingPolicy</value>
</property>
<property>
<name>dfs.datanode.available-space-volume-choosing-policy.balanced-space-threshold</name>
<value>10737418240</value>
</property>
<property>
<name>dfs.datanode.available-space-volume-choosing-policy.balanced-space-preference-fraction</name>
<value>0.75</value>
</property>
<!-- 2018.06.19 Disk parameter change 每个盘预留1.4T空间-->
<property>
<name>dfs.datanode.du.reserved</name>
<value>1503238553600</value>
<description>Reserved space in bytes per volume. Always leave this much space free for non dfs use. </description>
</property>
<property>
<name>dfs.datanode.failed.volumes.tolerated</name>
<value>1</value>
<description>The number of volumes that are allowed to fail before a datanode stops offering service. By default any volume failure will cause a datanode to shutdown. </description>
</property>
<property>
<name>dfs.client.read.shortcircuit.streams.cache.size</name>
<value>1000</value>
</property>
<property>
<name>dfs.client.read.shortcircuit.streams.cache.expiry.ms</name>
<value>10000</value>
</property>
<property>
<name>dfs.client.read.shortcircuit</name>
<value>true</value>
</property>
<property>
<name>dfs.domain.socket.path</name>
<value>/var/run/hadoop-hdfs/dn_socket</value>
</property>
<property>
<name>dfs.client.read.shortcircuit.skip.checksum</name>
<value>false</value>
</property>
<property>
<name>dfs.block.size</name>
<value>134217728</value>
</property>
<property>
<name>dfs.replication</name>
<value>3</value>
</property>
<property>
<name>dfs.namenode.handler.count</name>
<value>750</value>
</property>
<property>
<name>dfs.datanode.handler.count</name>
<value>40</value>
</property>
<property>
<name>dfs.webhdfs.enabled</name>
<value>true</value>
</property>
<property>
<name>dfs.namenode.datanode.registration.ip-hostname-check</name>
<value>false</value>
</property>
<!-- FsImage Checkpoint 参数调整-->
<property>
<name>dfs.namenode.checkpoint.period</name>
<value>3600</value>
</property>
<property>
<name>dfs.namenode.checkpoint.txns</name>
<value>500000</value>
</property>
<property>
<name>dfs.image.transfer.timeout</name>
<value>1800000</value>
</property>
<property>
<name>dfs.namenode.num.extra.edits.retained</name>
<value>1000000</value>
</property>
<property>
<name>dfs.permissions.enabled</name>
<value>true</value>
</property>
<property>
<name>dfs.permissions</name>
<value>true</value>
</property>
<property>
<name>dfs.namenode.inode.attributes.provider.class</name>
<value>org.apache.ranger.authorization.hadoop.RangerHdfsAuthorizer</value>
</property>
<property>
<name>dfs.namenode.datanode.registration.ip-hostname-check</name>
<value>false</value>
</property>
<!--20181101 spark fs close -->
<property>
<name>fs.hdfs.impl.disable.cache</name>
<value>true</value>
</property>
<!-- 增加 DFS Client 重试关闭文件检测The last Block次数 -->
<property>
<name>dfs.client.block.write.locateFollowingBlock.retries</name>
<value>10</value>
</property>
</configuration>
yarn-site.xml
<?xml version="1.0"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<configuration>
<!-- Site specific YARN configuration properties -->
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle,spark_shuffle</value>
</property>
<property>
<name>yarn.nodemanager.aux-services.mapreduce_shuffle.class</name>
<value>org.apache.hadoop.mapred.ShuffleHandler</value>
</property>
<property>
<name>yarn.nodemanager.aux-services.spark_shuffle.class</name>
<value>org.apache.spark.network.yarn.YarnShuffleService</value>
</property>
<property>
<name>spark.shuffle.service.port</name>
<value>7337</value>
</property>
<property>
<name>yarn.log-aggregation-enable</name>
<value>true</value>
</property>
<property>
<description>Where to aggregate logs to.</description>
<name>yarn.nodemanager.remote-app-log-dir</name>
<value>hdfs://flashHadoop/tmp/logs</value>
</property>
<property>
<name>yarn.nodemanager.remote-app-log-dir-suffix</name>
<value>logs</value>
</property>
<property>
<description>Classpath for typical applications.</description>
<name>yarn.application.classpath</name>
<value>
$HADOOP_CONF_DIR,
$HADOOP_COMMON_HOME/*,$HADOOP_COMMON_HOME/lib/*,
$HADOOP_HDFS_HOME/*,$HADOOP_HDFS_HOME/lib/*,
$HADOOP_MAPRED_HOME/*,$HADOOP_MAPRED_HOME/lib/*,
$HADOOP_YARN_HOME/*,$HADOOP_YARN_HOME/lib/*,
$HADOOP_COMMON_HOME/share/hadoop/common/*,
$HADOOP_COMMON_HOME/share/hadoop/common/lib/*,
$HADOOP_COMMON_HOME/share/hadoop/hdfs/*,
$HADOOP_COMMON_HOME/share/hadoop/hdfs/lib/*,
$HADOOP_COMMON_HOME/share/hadoop/mapreduce/*,
$HADOOP_COMMON_HOME/share/hadoop/mapreduce/lib/*,
$HADOOP_COMMON_HOME/share/hadoop/yarn/*,
$HADOOP_COMMON_HOME/share/hadoop/yarn/lib/*
</value>
</property>
<!-- resourcemanager config -->
<property>
<name>yarn.resourcemanager.connect.retry-interval.ms</name>
<value>2000</value>
</property>
<property>
<name>yarn.resourcemanager.ha.enabled</name>
<value>true</value>
</property>
<property>
<name>yarn.resourcemanager.ha.automatic-failover.enabled</name>
<value>true</value>
</property>
<property>
<name>yarn.resourcemanager.ha.automatic-failover.embedded</name>
<value>true</value>
</property>
<property>
<name>yarn.resourcemanager.cluster-id</name>
<value>FLASH_YARN</value>
</property>
<property>
<name>yarn.resourcemanager.ha.rm-ids</name>
<value>rm1,rm2</value>
</property>
<property>
<name>yarn.resourcemanager.hostname.rm1</name>
<value>VECS01121</value>
</property>
<property>
<name>yarn.resourcemanager.hostname.rm2</name>
<value>VECS01120</value>
</property>
<!-- CapacityScheduler
<property>
<name>yarn.resourcemanager.scheduler.class</name>
<value>org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler</value>
</property>
CapacityScheduler End-->
<!-- FairScheduler -->
<property>
<name>yarn.resourcemanager.scheduler.class</name>
<value>org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler</value>
</property>
<property>
<name>yarn.scheduler.fair.allow-undeclared-pools</name>
<value>false</value>
</property>
<!-- FairScheduler End-->
<property>
<name>yarn.resourcemanager.recovery.enabled</name>
<value>true</value>
</property>
<property>
<name>yarn.app.mapreduce.am.scheduler.connection.wait.interval-ms</name>
<value>5000</value>
</property>
<!-- 下线yarn nodemanager的列表文件。-->
<property>
<name>yarn.resourcemanager.nodes.exclude-path</name>
<value>/app/hadoop/etc/hadoop/yarn.exclude</value>
<final>true</final>
</property>
<!-- ZKRMStateStore config -->
<property>
<name>yarn.resourcemanager.store.class</name>
<value>org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore</value>
</property>
<property>
<name>yarn.resourcemanager.zk-address</name>
<value>VECS01120:2181,VECS01121:2181,VECS00047:2181,VECS01118:2181,VECS01119:2181</value>
</property>
<property>
<name>yarn.resourcemanager.zk.state-store.address</name>
<value>VECS01120:2181,VECS01121:2181,VECS00047:2181,VECS01118:2181,VECS01119:2181</value>
</property>
<!-- applications manager interface -->
<!--客户端通过该地址向RM提交对应用程序操作-->
<property>
<name>yarn.resourcemanager.address.rm1</name>
<value>VECS01121:23140</value>
</property>
<property>
<name>yarn.resourcemanager.address.rm2</name>
<value>VECS01120:23140</value>
</property>
<!-- scheduler interface -->
<!--向RM调度资源地址-->
<property>
<name>yarn.resourcemanager.scheduler.address.rm1</name>
<value>VECS01121:23130</value>
</property>
<property>
<name>yarn.resourcemanager.scheduler.address.rm2</name>
<value>VECS01120:23130</value>
</property>
<!-- RM admin interface -->
<property>
<name>yarn.resourcemanager.admin.address.rm1</name>
<value>VECS01121:23141</value>
</property>
<property>
<name>yarn.resourcemanager.admin.address.rm2</name>
<value>VECS01120:23141</value>
</property>
<!-- RM resource-tracker interface nm向rm汇报心跳&& 领取任务-->
<property>
<name>yarn.resourcemanager.resource-tracker.address.rm1</name>
<value>VECS01121:23125</value>
</property>
<property>
<name>yarn.resourcemanager.resource-tracker.address.rm2</name>
<value>VECS01120:23125</value>
</property>
<!-- RM web application interface -->
<property>
<name>yarn.resourcemanager.webapp.address.rm1</name>
<value>VECS01121:8088</value>
</property>
<property>
<name>yarn.resourcemanager.webapp.address.rm2</name>
<value>VECS01120:8088</value>
</property>
<property>
<name>yarn.resourcemanager.webapp.https.address.rm1</name>
<value>VECS01121:23189</value>
</property>
<property>
<name>yarn.resourcemanager.webapp.https.address.rm2</name>
<value>VECS01120:23189</value>
</property>
<property>
<name>yarn.log.server.url</name>
<value>http://VECS01121:19888/jobhistory/logs</value>
</property>
<property>
<name>yarn.web-proxy.address</name>
<value>VECS01121:54315</value>
</property>
<!-- Node Manager Configs -->
<property>
<description>Address where the localizer IPC is.</description>
<name>yarn.nodemanager.localizer.address</name>
<value>0.0.0.0:23344</value>
</property>
<property>
<description>NM Webapp address.</description>
<name>yarn.nodemanager.webapp.address</name>
<value>0.0.0.0:8042</value>
</property>
<property>
<name>yarn.nodemanager.local-dirs</name>
<value>file:///data12/HDATA/yarn/local,
file:///data11/HDATA/yarn/local,
file:///data10/HDATA/yarn/local,
file:///data9/HDATA/yarn/local,
file:///data8/HDATA/yarn/local,
file:///data7/HDATA/yarn/local,
file:///data6/HDATA/yarn/local,
file:///data5/HDATA/yarn/local,
file:///data4/HDATA/yarn/local,
file:///data3/HDATA/yarn/local,
file:///data2/HDATA/yarn/local,
file:///data1/HDATA/yarn/local</value>
</property>
<property>
<name>yarn.nodemanager.log-dirs</name>
<value>file:///data12/HDATA/yarn/logs,
file:///data11/HDATA/yarn/logs,
file:///data10/HDATA/yarn/logs,
file:///data9/HDATA/yarn/logs,
file:///data8/HDATA/yarn/logs,
file:///data7/HDATA/yarn/logs,
file:///data6/HDATA/yarn/logs,
file:///data5/HDATA/yarn/logs,
file:///data4/HDATA/yarn/logs,
file:///data3/HDATA/yarn/logs,
file:///data2/HDATA/yarn/logs,
file:///data1/HDATA/yarn/logs</value>
</property>
<property>
<name>yarn.nodemanager.delete.debug-delay-sec</name>
<value>1200</value>
</property>
<property>
<name>mapreduce.shuffle.port</name>
<value>23080</value>
</property>
<property>
<name>yarn.resourcemanager.work-preserving-recovery.enabled</name>
<value>true</value>
</property>
<!-- tuning -->
<property>
<name>yarn.nodemanager.resource.memory-mb</name>
<value>117760</value>
</property>
<property>
<name>yarn.nodemanager.resource.cpu-vcores</name>
<value>60</value>
</property>
<!-- tuning yarn container -->
<property>
<name>yarn.scheduler.minimum-allocation-mb</name>
<value>2048</value>
</property>
<property>
<name>yarn.scheduler.maximum-allocation-mb</name>
<value>16384</value>
</property>
<property>
<name>yarn.scheduler.increment-allocation-mb</name>
<value>512</value>
</property>
<property>
<name>yarn.nodemanager.vmem-check-enabled</name>
<value>false</value>
</property>
<property>
<name>yarn.nodemanager.pmem-check-enabled</name>
<value>false</value>
</property>
<property>
<name>yarn.nodemanager.vmem-pmem-ratio</name>
<value>4</value>
<description>Ratio between virtual memory to physical memory when setting memory limits for containers</description>
</property>
<property>
<name>yarn.log-aggregation.retain-seconds</name>
<value>604800</value>
</property>
<!-- 新增新特性 -->
<property>
<name>yarn.node-labels.enabled</name>
<value>true</value>
</property>
<property>
<name>yarn.node-labels.fs-store.root-dir</name>
<value>hdfs://flashHadoop/yarn/yarn-node-labels/</value>
</property>
<!--启用Linux container -->
<property>
<name>yarn.nodemanager.container-executor.class</name>
<value>org.apache.hadoop.yarn.server.nodemanager.LinuxContainerExecutor</value>
</property>
<property>
<name>yarn.nodemanager.linux-container-executor.nonsecure-mode.limit-users</name>
<value>false</value>
</property>
<!-- timeline server -->
<property>
<name>yarn.timeline-service.enabled</name>
<value>true</value>
</property>
<property>
<name>yarn.resourcemanager.system-metrics-publisher.enabled</name>
<value>false</value>
</property>
<property>
<name>yarn.timeline-service.generic-application-history.enabled</name>
<value>true</value>
</property>
<property>
<name>yarn.timeline-service.http-cross-origin.enabled</name>
<value>true</value>
</property>
<property>
<name>yarn.timeline-service.generic-application-history.max-applications</name>
<value>50000</value>
</property>
<property>
<name>yarn.timeline-service.leveldb-timeline-store.start-time-read-cache-size</name>
<value>50000</value>
</property>
<property>
<name>yarn.timeline-service.leveldb-timeline-store.start-time-write-cache-size</name>
<value>50000</value>
</property>
<property>
<name>yarn.resourcemanager.max-completed-applications</name>
<value>50000</value>
</property>
<property>
<name>yarn.timeline-service.hostname</name>
<value>VECS01121</value>
</property>
<property>
<name>yarn.timeline-service.handler-thread-count</name>
<value>10</value>
</property>
<property>
<name>yarn.timeline-service.leveldb-timeline-store.path</name>
<value>/app/hadoop/tmp/yarn/timeline/</value>
</property>
<property>
<name>yarn.timeline-service.leveldb-state-store.path</name>
<value>/app/hadoop/tmp/yarn/timeline/timeline-state-store.ldb</value>
</property>
<!--调整resourcemanager -->
<property>
<name>yarn.resourcemanager.client.thread-count</name>
<value>150</value>
</property>
<property>
<name>yarn.resourcemanager.amlauncher.thread-count</name>
<value>150</value>
</property>
<property>
<name>yarn.resourcemanager.scheduler.client.thread-count</name>
<value>150</value>
</property>
</configuration>
mapred-site.xml
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<!-- Put site-specific property overrides in this file. -->
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<property>
<name>mapreduce.jobhistory.address</name>
<value>VECS01121:10020</value>
</property>
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>VECS01121:19888</value>
</property>
<property>
<name>yarn.app.mapreduce.am.staging-dir</name>
<value>/user</value>
</property>
<!-- tuning mapreduce -->
<property>
<name>mapreduce.map.memory.mb</name>
<value>2048</value>
</property>
<property>
<name>mapreduce.map.java.opts</name>
<value>-Xmx1536m -XX:+UseParNewGC -XX:+UseConcMarkSweepGC -XX:+UseCMSCompactAtFullCollection -XX:CMSFullGCsBeforeCompaction=15 -XX:CMSInitiatingOccupancyFraction=70 -Dfile.encoding=UTF-8</value>
</property>
<property>
<name>mapreduce.reduce.memory.mb</name>
<value>6144</value>
</property>
<property>
<name>mapreduce.reduce.java.opts</name>
<value>-Xmx4608m -XX:+UseParNewGC -XX:+UseConcMarkSweepGC -XX:+UseCMSCompactAtFullCollection -XX:CMSFullGCsBeforeCompaction=15 -XX:CMSInitiatingOccupancyFraction=70 -Dfile.encoding=UTF-8</value>
</property>
<property>
<name>mapreduce.map.cpu.vcores</name>
<value>1</value>
</property>
<property>
<name>mapreduce.reduce.cpu.vcores</name>
<value>2</value>
</property>
<property>
<name>mapreduce.cluster.local.dir</name>
<value>file:///data8/HDATA/mapred/local,
file:///data7/HDATA/mapred/local,
file:///data6/HDATA/mapred/local,
file:///data5/HDATA/mapred/local,
file:///data4/HDATA/mapred/local,
file:///data3/HDATA/mapred/local,
file:///data2/HDATA/mapred/local,
file:///data1/HDATA/mapred/local</value>
</property>
<!--map and shuffle and reduce turning -->
<property>
<name>mapreduce.task.io.sort.mb</name>
<value>300</value>
</property>
<!-- 30*10=io.sort.mb -->
<property>
<name>mapreduce.jobhistory.max-age-ms</name>
<value>1296000000</value>
<source>mapred-default.xml</source>
</property>
<property>
<name>mapreduce.jobhistory.joblist.cache.size</name>
<value>200000</value>
<source>mapred-default.xml</source>
</property>
<property>
<name>mapreduce.input.fileinputformat.input.dir.recursive</name>
<value>true</value>
</property>
</configuration>
Presto hive.properties
connector.name=hive-hadoop2
hive.metastore.uri=thrift://VECS01219:9083,thrift://VECS00047:9083
hive.config.resources=/app/hadoop/etc/hadoop/core-site.xml,/app/hadoop/etc/hadoop/hdfs-site.xml
hive.metastore-cache-ttl=0s
hive.metastore-refresh-interval=1s
hive.parquet.use-column-names=true
hive.recursive-directories=true
hbase-site.xml
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
/**
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-->
<configuration>
<property>
<name>hbase.rest.port</name>
<value>60050</value>
</property>
<property>
<name>hbase.cluster.distributed</name>
<value>true</value>
</property>
<property>
<name>hbase.rootdir</name>
<value>hdfs://hbaseHadoopdc/hbase</value>
</property>
<property>
<name>hbase.zookeeper.quorum</name>
<value>vecs00994.hello-it.top:2181,vecs00995.hello-it.top:2181,vecs00996.hello-it.top:2181,vecs00997.hello-it.top:2181,vecs00998.hello-it.top:2181</value>
</property>
<property>
<name>zookeeper.session.timeout</name>
<value>120000</value>
</property>
<!-- replication -->
<property>
<name>hbase.replication</name>
<value>true</value>
</property>
<property>
<name>replication.source.ratio</name>
<value>1</value>
</property>
<property>
<name>hbase.regionserver.handler.count</name>
<value>200</value>
</property>
<property>
<name>hbase.hregion.majorcompaction</name>
<value>0</value>
</property>
<property>
<name>hbase.hstore.compactionThreshold</name>
<value>6</value>
</property>
<property>
<name>hbase.hstore.blockingStoreFiles</name>
<value>100</value>
</property>
<property>
<name>hbase.hregion.memstore.block.multiplier</name>
<value>4</value>
</property>
<property>
<name>hbase.hregion.max.filesize</name>
<value>21474836480</value>
</property>
<property>
<name>hbase.hregion.memstore.flush.size</name>
<value>67108864</value>
</property>
<property>
<name>dfs.datanode.handler.count</name>
<value>40</value>
</property>
<property>
<name>hbase.regionserver.maxlogs</name>
<value>256</value>
</property>
<property>
<name>hbase.regionserver.hlog.splitlog.writer.threads</name>
<value>10</value>
</property>
<!-- Memstore tuning-->
<property>
<name>hbase.regionserver.global.memstore.size</name>
<value>0.50</value>
</property>
<property>
<name>hbase.regionserver.global.memstore.size.lower.limit</name>
<value>0.90</value>
</property>
<!--Flush All memstore bofore 5 h-->
<property>
<name>hbase.regionserver.optionalcacheflushinterval</name>
<value>36000000</value>
</property>
<!--Set Major Minor thread -->
<property>
<name>hbase.regionserver.thread.compaction.small</name>
<value>5</value>
</property>
<property>
<name>hbase.regionserver.thread.compaction.large</name>
<value>5</value>
</property>
<property>
<name>hbase.bucketcache.ioengine</name>
<value>offheap</value>
</property>
<!-- Blockcache M 20G-->
<property>
<name>hbase.bucketcache.size</name>
<value>20480</value>
</property>
<property>
<name>hfile.block.cache.size</name>
<value>0.30</value>
</property>
<!-- 开启Heged Read -->
<property>
<name>dfs.client.hedged.read.threadpool.size</name>
<value>10</value>
</property>
<property>
<name>dfs.client.hedged.read.threshold.millis</name>
<value>500</value>
</property>
<property>
<name>hbase.ipc.server.max.callqueue.size</name>
<value>2147483647</value>
</property>
<property>
<name>hbase.regionserver.region.split.policy</name>
<value>org.apache.hadoop.hbase.regionserver.ConstantSizeRegionSplitPolicy</value>
<description>
A split policy determines when a region should be split. The various other split policies that
are available currently are ConstantSizeRegionSplitPolicy, DisabledRegionSplitPolicy,
DelimitedKeyPrefixRegionSplitPolicy, KeyPrefixRegionSplitPolicy etc.
</description>
</property>
<property>
<name>hbase.coprocessor.master.classes</name>
<value>org.apache.hadoop.hbase.group.GroupAdminEndpoint</value>
</property>
<property>
<name>hbase.master.loadbalancer.class</name>
<value>org.apache.hadoop.hbase.group.GroupBasedLoadBalancer</value>
</property>
<property>
<name>hbase.coprocessor.user.region.classes</name>
<value>org.apache.hadoop.hbase.coprocessor.AggregateImplementation</value>
</property>
<property>
<name>hbase.master.ui.readonly</name>
<value>true</value>
</property>
<property>
<name>hbase.wal.provider</name>
<value>multiwal</value>
</property>
<property>
<name>hbase.wal.regiongrouping.strategy</name>
<value>bounded</value>
</property>
<property>
<name>hbase.wal.regiongrouping.numgroups</name>
<value>1</value>
</property>
<property>
<name>hbase.hlog.asyncer.number</name>
<value>16</value>
</property>
<property>
<name>hbase.wal.storage.policy</name>
<value>ALL_SSD</value>
</property>
</configuration>
hbase-env.sh
#
#/**
# * Copyright 2007 The Apache Software Foundation
# *
# * Licensed to the Apache Software Foundation (ASF) under one
# * or more contributor license agreements. See the NOTICE file
# * distributed with this work for additional information
# * regarding copyright ownership. The ASF licenses this file
# * to you under the Apache License, Version 2.0 (the
# * "License"); you may not use this file except in compliance
# * with the License. You may obtain a copy of the License at
# *
# * http://www.apache.org/licenses/LICENSE-2.0
# *
# * Unless required by applicable law or agreed to in writing, software
# * distributed under the License is distributed on an "AS IS" BASIS,
# * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# * See the License for the specific language governing permissions and
# * limitations under the License.
# */
# Set environment variables here.
# This script sets variables multiple times over the course of starting an hbase process,
# so try to keep things idempotent unless you want to take an even deeper look
# into the startup scripts (bin/hbase, etc.)
# The java implementation to use. Java 1.6 required.
# export JAVA_HOME=/usr/java/jdk1.6.0/
export JAVA_HOME=/app/jdk/
# Extra Java CLASSPATH elements. Optional.
# export HBASE_CLASSPATH=
# The maximum amount of heap to use, in MB. Default is 1000.
# export HBASE_HEAPSIZE=1000
# Extra Java runtime options.
# Below are what we set by default. May only work with SUN JVM.
# For more on why as well as other possible settings,
# see http://wiki.apache.org/hadoop/PerformanceTuning
export HBASE_OPTS="-XX:+UseConcMarkSweepGC"
# Uncomment one of the below three options to enable java garbage collection logging for the server-side processes.
# This enables basic gc logging to the .out file.
# export SERVER_GC_OPTS="-verbose:gc -XX:+PrintGCDetails -XX:+PrintGCDateStamps"
# This enables basic gc logging to its own file.
# If FILE-PATH is not replaced, the log file(.gc) would still be generated in the HBASE_LOG_DIR .
# export SERVER_GC_OPTS="-verbose:gc -XX:+PrintGCDetails -XX:+PrintGCDateStamps -Xloggc:<FILE-PATH>"
# This enables basic GC logging to its own file with automatic log rolling. Only applies to jdk 1.6.0_34+ and 1.7.0_2+.
# If FILE-PATH is not replaced, the log file(.gc) would still be generated in the HBASE_LOG_DIR .
# export SERVER_GC_OPTS="-verbose:gc -XX:+PrintGCDetails -XX:+PrintGCDateStamps -Xloggc:<FILE-PATH> -XX:+UseGCLogFileRotation -XX:NumberOfGCLogFiles=1 -XX:GCLogFileSize=512M"
# Uncomment one of the below three options to enable java garbage collection logging for the client processes.
# This enables basic gc logging to the .out file.
# export CLIENT_GC_OPTS="-verbose:gc -XX:+PrintGCDetails -XX:+PrintGCDateStamps"
# This enables basic gc logging to its own file.
# If FILE-PATH is not replaced, the log file(.gc) would still be generated in the HBASE_LOG_DIR .
# export CLIENT_GC_OPTS="-verbose:gc -XX:+PrintGCDetails -XX:+PrintGCDateStamps -Xloggc:<FILE-PATH>"
# This enables basic GC logging to its own file with automatic log rolling. Only applies to jdk 1.6.0_34+ and 1.7.0_2+.
# If FILE-PATH is not replaced, the log file(.gc) would still be generated in the HBASE_LOG_DIR .
# export CLIENT_GC_OPTS="-verbose:gc -XX:+PrintGCDetails -XX:+PrintGCDateStamps -Xloggc:<FILE-PATH> -XX:+UseGCLogFileRotation -XX:NumberOfGCLogFiles=1 -XX:GCLogFileSize=512M"
# Uncomment below if you intend to use the EXPERIMENTAL off heap cache.
# export HBASE_OPTS="$HBASE_OPTS -XX:MaxDirectMemorySize="
# Set hbase.offheapcache.percentage in hbase-site.xml to a nonzero value.
# Uncomment and adjust to enable JMX exporting
# See jmxremote.password and jmxremote.access in $JRE_HOME/lib/management to configure remote password access.
# More details at: http://java.sun.com/javase/6/docs/technotes/guides/management/agent.html
# NOTE: HBase provides an alternative JMX implementation to fix the random ports issue, please see JMX
# section in HBase Reference Guide for instructions.
# export HBASE_JMX_BASE="-Dcom.sun.management.jmxremote.ssl=false -Dcom.sun.management.jmxremote.authenticate=false"
# export HBASE_MASTER_OPTS="$HBASE_MASTER_OPTS $HBASE_JMX_BASE -Dcom.sun.management.jmxremote.port=10101"
# export HBASE_REGIONSERVER_OPTS="$HBASE_REGIONSERVER_OPTS $HBASE_JMX_BASE -Dcom.sun.management.jmxremote.port=10102"
# export HBASE_THRIFT_OPTS="$HBASE_THRIFT_OPTS $HBASE_JMX_BASE -Dcom.sun.management.jmxremote.port=10103"
# export HBASE_ZOOKEEPER_OPTS="$HBASE_ZOOKEEPER_OPTS $HBASE_JMX_BASE -Dcom.sun.management.jmxremote.port=10104"
# export HBASE_REST_OPTS="$HBASE_REST_OPTS $HBASE_JMX_BASE -Dcom.sun.management.jmxremote.port=10105"
# File naming hosts on which HRegionServers will run. $HBASE_HOME/conf/regionservers by default.
# export HBASE_REGIONSERVERS=${HBASE_HOME}/conf/regionservers
# Uncomment and adjust to keep all the Region Server pages mapped to be memory resident
#HBASE_REGIONSERVER_MLOCK=true
#HBASE_REGIONSERVER_UID="hbase"
# File naming hosts on which backup HMaster will run. $HBASE_HOME/conf/backup-masters by default.
# export HBASE_BACKUP_MASTERS=${HBASE_HOME}/conf/backup-masters
# Extra ssh options. Empty by default.
# export HBASE_SSH_OPTS="-o ConnectTimeout=1 -o SendEnv=HBASE_CONF_DIR"
# Where log files are stored. $HBASE_HOME/logs by default.
# export HBASE_LOG_DIR=${HBASE_HOME}/logs
# Enable remote JDWP debugging of major HBase processes. Meant for Core Developers
# export HBASE_MASTER_OPTS="$HBASE_MASTER_OPTS -Xdebug -Xrunjdwp:transport=dt_socket,server=y,suspend=n,address=8070"
# export HBASE_REGIONSERVER_OPTS="$HBASE_REGIONSERVER_OPTS -Xdebug -Xrunjdwp:transport=dt_socket,server=y,suspend=n,address=8071"
# export HBASE_THRIFT_OPTS="$HBASE_THRIFT_OPTS -Xdebug -Xrunjdwp:transport=dt_socket,server=y,suspend=n,address=8072"
# export HBASE_ZOOKEEPER_OPTS="$HBASE_ZOOKEEPER_OPTS -Xdebug -Xrunjdwp:transport=dt_socket,server=y,suspend=n,address=8073"
# A string representing this instance of hbase. $USER by default.
# export HBASE_IDENT_STRING=$USER
# The scheduling priority for daemon processes. See 'man nice'.
# export HBASE_NICENESS=10
# The directory where pid files are stored. /tmp by default.
# export HBASE_PID_DIR=/var/hadoop/pids
# Seconds to sleep between slave commands. Unset by default. This
# can be useful in large clusters, where, e.g., slave rsyncs can
# otherwise arrive faster than the master can service them.
# export HBASE_SLAVE_SLEEP=0.1
# Tell HBase whether it should manage it's own instance of Zookeeper or not.
# export HBASE_MANAGES_ZK=true
# The default log rolling policy is RFA, where the log file is rolled as per the size defined for the
# RFA appender. Please refer to the log4j.properties file to see more details on this appender.
# In case one needs to do log rolling on a date change, one should set the environment property
# HBASE_ROOT_LOGGER to "<DESIRED_LOG LEVEL>,DRFA".
# For example:
# HBASE_ROOT_LOGGER=INFO,DRFA
# The reason for changing default to RFA is to avoid the boundary case of filling out disk space as
# DRFA doesn't put any cap on the log size. Please refer to HBase-5655 for more context.
export HBASE_LOG_DIR=/app/hbase/log
export HBASE_PID_DIR=/app/hbase/tmp
export HBASE_HEAPSIZE=16384
export HBASE_OFFHEAPSIZE=25g
export HBASE_MASTER_OPTS="$HBASE_MASTER_OPTS -Xmx16g -Xms16g -Xmn4g -Xss256k -XX:MaxPermSize=256m -XX:SurvivorRatio=2 -XX:+UseParNewGC -XX:ParallelGCThreads=12 -XX:+UseConcMarkSweepGC -XX:ParallelCMSThreads=16 -XX:+CMSParallelRemarkEnabled -XX:MaxTenuringThreshold=15 -XX:+UseCMSCompactAtFullCollection -XX:+UseCMSInitiatingOccupancyOnly -XX:CMSInitiatingOccupancyFraction=70 -XX:-DisableExplicitGC -XX:+HeapDumpOnOutOfMemoryError -verbose:gc -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -XX:+PrintGCDateStamps -Xloggc:/app/hbase/log/gc/gc-hbase-hmaster-`hostname`.log"
export HBASE_REGIONSERVER_OPTS="$HBASE_REGIONSERVER_OPTS -Xmx30g -Xms30g -Xmn2g -Xss256k -XX:MaxPermSize=256m -XX:SurvivorRatio=2 -XX:+UseParNewGC -XX:ParallelGCThreads=12 -XX:+UseConcMarkSweepGC -XX:ParallelCMSThreads=16 -XX:+CMSParallelRemarkEnabled -XX:MaxTenuringThreshold=15 -XX:+UseCMSCompactAtFullCollection -XX:+UseCMSInitiatingOccupancyOnly -XX:CMSInitiatingOccupancyFraction=70 -XX:+HeapDumpOnOutOfMemoryError -verbose:gc -XX:+PrintGCDateStamps -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -Xloggc:/app/hbase/log/gc/gc-hbase-regionserver-`hostname`.log"
export HBASE_REGIONSERVER_OPTS= "$HBASE_REGIONSERVER_OPTS -Xmx32g -Xms32g -Xmn6g -Xss256k -XX:MaxPermSize=384m -XX:SurvivorRatio=6 -XX:+UseParNewGC -XX:ParallelGCThreads=10 -XX:+UseConcMarkSweepGC -XX:ParallelCMSThreads=16 -XX:+CMSParallelRemarkEnabled -XX:+UseCMSCompactAtFullCollection -XX:+UseCMSInitiatingOccupancyOnly -XX:CMSInitiatingOccupancyFraction=70 -XX:CMSMaxAbortablePrecleanTime=5000 -XX:CMSFullGCsBeforeCompaction=5 -XX:+CMSClassUnloadingEnabled -XX:+HeapDumpOnOutOfMemoryError -verbose:gc -XX:+PrintGCDateStamps -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -Xloggc:/app/hbase/log/gc/gc-hbase-regionserver-`hostname`.log"
export HBASE_JMX_BASE="-Dcom.sun.management.jmxremote.ssl=false -Dcom.sun.management.jmxremote.authenticate=false"
export HBASE_MASTER_OPTS="$HBASE_MASTER_OPTS $HBASE_JMX_BASE -Dcom.sun.management.jmxremote.port=20101"
export HBASE_REGIONSERVER_OPTS="$HBASE_REGIONSERVER_OPTS $HBASE_JMX_BASE -Dcom.sun.management.jmxremote.port=20102"
export HBASE_THRIFT_OPTS="$HBASE_THRIFT_OPTS $HBASE_JMX_BASE -Dcom.sun.management.jmxremote.port=20103"
export HBASE_ZOOKEEPER_OPTS="$HBASE_ZOOKEEPER_OPTS $HBASE_JMX_BASE -Dcom.sun.management.jmxremote.port=20104"
hbase-env.sh_G1
#
#/**
# * Copyright 2007 The Apache Software Foundation
# *
# * Licensed to the Apache Software Foundation (ASF) under one
# * or more contributor license agreements. See the NOTICE file
# * distributed with this work for additional information
# * regarding copyright ownership. The ASF licenses this file
# * to you under the Apache License, Version 2.0 (the
# * "License"); you may not use this file except in compliance
# * with the License. You may obtain a copy of the License at
# *
# * http://www.apache.org/licenses/LICENSE-2.0
# *
# * Unless required by applicable law or agreed to in writing, software
# * distributed under the License is distributed on an "AS IS" BASIS,
# * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# * See the License for the specific language governing permissions and
# * limitations under the License.
# */
# Set environment variables here.
# This script sets variables multiple times over the course of starting an hbase process,
# so try to keep things idempotent unless you want to take an even deeper look
# into the startup scripts (bin/hbase, etc.)
# The java implementation to use. Java 1.6 required.
# export JAVA_HOME=/usr/java/jdk1.6.0/
export JAVA_HOME=/app/jdk/
# Extra Java CLASSPATH elements. Optional.
# export HBASE_CLASSPATH=
# The maximum amount of heap to use, in MB. Default is 1000.
# export HBASE_HEAPSIZE=1000
# Extra Java runtime options.
# Below are what we set by default. May only work with SUN JVM.
# For more on why as well as other possible settings,
# see http://wiki.apache.org/hadoop/PerformanceTuning
# export HBASE_OPTS="-XX:+UseConcMarkSweepGC"
# Uncomment one of the below three options to enable java garbage collection logging for the server-side processes.
# This enables basic gc logging to the .out file.
# export SERVER_GC_OPTS="-verbose:gc -XX:+PrintGCDetails -XX:+PrintGCDateStamps"
# This enables basic gc logging to its own file.
# If FILE-PATH is not replaced, the log file(.gc) would still be generated in the HBASE_LOG_DIR .
# export SERVER_GC_OPTS="-verbose:gc -XX:+PrintGCDetails -XX:+PrintGCDateStamps -Xloggc:<FILE-PATH>"
# This enables basic GC logging to its own file with automatic log rolling. Only applies to jdk 1.6.0_34+ and 1.7.0_2+.
# If FILE-PATH is not replaced, the log file(.gc) would still be generated in the HBASE_LOG_DIR .
# export SERVER_GC_OPTS="-verbose:gc -XX:+PrintGCDetails -XX:+PrintGCDateStamps -Xloggc:<FILE-PATH> -XX:+UseGCLogFileRotation -XX:NumberOfGCLogFiles=1 -XX:GCLogFileSize=512M"
# Uncomment one of the below three options to enable java garbage collection logging for the client processes.
# This enables basic gc logging to the .out file.
# export CLIENT_GC_OPTS="-verbose:gc -XX:+PrintGCDetails -XX:+PrintGCDateStamps"
# This enables basic gc logging to its own file.
# If FILE-PATH is not replaced, the log file(.gc) would still be generated in the HBASE_LOG_DIR .
# export CLIENT_GC_OPTS="-verbose:gc -XX:+PrintGCDetails -XX:+PrintGCDateStamps -Xloggc:<FILE-PATH>"
# This enables basic GC logging to its own file with automatic log rolling. Only applies to jdk 1.6.0_34+ and 1.7.0_2+.
# If FILE-PATH is not replaced, the log file(.gc) would still be generated in the HBASE_LOG_DIR .
# export CLIENT_GC_OPTS="-verbose:gc -XX:+PrintGCDetails -XX:+PrintGCDateStamps -Xloggc:<FILE-PATH> -XX:+UseGCLogFileRotation -XX:NumberOfGCLogFiles=1 -XX:GCLogFileSize=512M"
# Uncomment below if you intend to use the EXPERIMENTAL off heap cache.
# export HBASE_OPTS="$HBASE_OPTS -XX:MaxDirectMemorySize="
# Set hbase.offheapcache.percentage in hbase-site.xml to a nonzero value.
# Uncomment and adjust to enable JMX exporting
# See jmxremote.password and jmxremote.access in $JRE_HOME/lib/management to configure remote password access.
# More details at: http://java.sun.com/javase/6/docs/technotes/guides/management/agent.html
# NOTE: HBase provides an alternative JMX implementation to fix the random ports issue, please see JMX
# section in HBase Reference Guide for instructions.
# export HBASE_JMX_BASE="-Dcom.sun.management.jmxremote.ssl=false -Dcom.sun.management.jmxremote.authenticate=false"
# export HBASE_MASTER_OPTS="$HBASE_MASTER_OPTS $HBASE_JMX_BASE -Dcom.sun.management.jmxremote.port=10101"
# export HBASE_REGIONSERVER_OPTS="$HBASE_REGIONSERVER_OPTS $HBASE_JMX_BASE -Dcom.sun.management.jmxremote.port=10102"
# export HBASE_THRIFT_OPTS="$HBASE_THRIFT_OPTS $HBASE_JMX_BASE -Dcom.sun.management.jmxremote.port=10103"
# export HBASE_ZOOKEEPER_OPTS="$HBASE_ZOOKEEPER_OPTS $HBASE_JMX_BASE -Dcom.sun.management.jmxremote.port=10104"
# export HBASE_REST_OPTS="$HBASE_REST_OPTS $HBASE_JMX_BASE -Dcom.sun.management.jmxremote.port=10105"
# File naming hosts on which HRegionServers will run. $HBASE_HOME/conf/regionservers by default.
# export HBASE_REGIONSERVERS=${HBASE_HOME}/conf/regionservers
# Uncomment and adjust to keep all the Region Server pages mapped to be memory resident
#HBASE_REGIONSERVER_MLOCK=true
#HBASE_REGIONSERVER_UID="hbase"
# File naming hosts on which backup HMaster will run. $HBASE_HOME/conf/backup-masters by default.
# export HBASE_BACKUP_MASTERS=${HBASE_HOME}/conf/backup-masters
# Extra ssh options. Empty by default.
# export HBASE_SSH_OPTS="-o ConnectTimeout=1 -o SendEnv=HBASE_CONF_DIR"
# Where log files are stored. $HBASE_HOME/logs by default.
# export HBASE_LOG_DIR=${HBASE_HOME}/logs
# Enable remote JDWP debugging of major HBase processes. Meant for Core Developers
# export HBASE_MASTER_OPTS="$HBASE_MASTER_OPTS -Xdebug -Xrunjdwp:transport=dt_socket,server=y,suspend=n,address=8070"
# export HBASE_REGIONSERVER_OPTS="$HBASE_REGIONSERVER_OPTS -Xdebug -Xrunjdwp:transport=dt_socket,server=y,suspend=n,address=8071"
# export HBASE_THRIFT_OPTS="$HBASE_THRIFT_OPTS -Xdebug -Xrunjdwp:transport=dt_socket,server=y,suspend=n,address=8072"
# export HBASE_ZOOKEEPER_OPTS="$HBASE_ZOOKEEPER_OPTS -Xdebug -Xrunjdwp:transport=dt_socket,server=y,suspend=n,address=8073"
# A string representing this instance of hbase. $USER by default.
# export HBASE_IDENT_STRING=$USER
# The scheduling priority for daemon processes. See 'man nice'.
# export HBASE_NICENESS=10
# The directory where pid files are stored. /tmp by default.
# export HBASE_PID_DIR=/var/hadoop/pids
# Seconds to sleep between slave commands. Unset by default. This
# can be useful in large clusters, where, e.g., slave rsyncs can
# otherwise arrive faster than the master can service them.
# export HBASE_SLAVE_SLEEP=0.1
# Tell HBase whether it should manage it's own instance of Zookeeper or not.
# export HBASE_MANAGES_ZK=true
# The default log rolling policy is RFA, where the log file is rolled as per the size defined for the
# RFA appender. Please refer to the log4j.properties file to see more details on this appender.
# In case one needs to do log rolling on a date change, one should set the environment property
# HBASE_ROOT_LOGGER to "<DESIRED_LOG LEVEL>,DRFA".
# For example:
# HBASE_ROOT_LOGGER=INFO,DRFA
# The reason for changing default to RFA is to avoid the boundary case of filling out disk space as
# DRFA doesn't put any cap on the log size. Please refer to HBase-5655 for more context.
export HBASE_LOG_DIR=/app/hbase/log
export HBASE_PID_DIR=/app/hbase/tmp
export HBASE_HEAPSIZE=16384
export HBASE_OFFHEAPSIZE=25g
export HBASE_MASTER_OPTS="$HBASE_MASTER_OPTS -Xmx16g -Xms16g -Xmn4g -Xss256k -XX:MaxPermSize=256m -XX:SurvivorRatio=2 -XX:+UseParNewGC -XX:ParallelGCThreads=12 -XX:+UseConcMarkSweepGC -XX:ParallelCMSThreads=16 -XX:+CMSParallelRemarkEnabled -XX:MaxTenuringThreshold=15 -XX:+UseCMSCompactAtFullCollection -XX:+UseCMSInitiatingOccupancyOnly -XX:CMSInitiatingOccupancyFraction=70 -XX:-DisableExplicitGC -XX:+HeapDumpOnOutOfMemoryError -verbose:gc -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -XX:+PrintGCDateStamps -Xloggc:/app/hbase/log/gc/gc-hbase-hmaster-`hostname`.log"
export HBASE_REGIONSERVER_OPTS="$HBASE_REGIONSERVER_OPTS -XX:+UseG1GC -Xmx30g -Xms30g -XX:+UnlockExperimentalVMOptions -XX:MaxGCPauseMillis=100 -XX:-ResizePLAB -XX:+ParallelRefProcEnabled -XX:+AlwaysPreTouch -XX:ParallelGCThreads=16 -XX:ConcGCThreads=8 -XX:G1HeapWastePercent=3 -XX:InitiatingHeapOccupancyPercent=35 -XX:G1MixedGCLiveThresholdPercent=85 -XX:G1NewSizePercent=1 -XX:G1MaxNewSizePercent=10 -XX:MaxDirectMemorySize=25g -verbose:gc -XX:+PrintGC -XX:+PrintGCDetails -XX:+PrintGCApplicationStoppedTime -XX:+PrintHeapAtGC -XX:+PrintGCDateStamps -XX:+PrintAdaptiveSizePolicy -XX:PrintSafepointStatisticsCount=1 -XX:PrintFLSStatistics=1 -Xloggc:/app/hbase/log/gc/gc-hbase-regionserver-`hostname`.log"
export HBASE_JMX_BASE="-Dcom.sun.management.jmxremote.ssl=false -Dcom.sun.management.jmxremote.authenticate=false"
export HBASE_MASTER_OPTS="$HBASE_MASTER_OPTS $HBASE_JMX_BASE -Dcom.sun.management.jmxremote.port=20101"
export HBASE_REGIONSERVER_OPTS="$HBASE_REGIONSERVER_OPTS $HBASE_JMX_BASE -Dcom.sun.management.jmxremote.port=20102"
export HBASE_THRIFT_OPTS="$HBASE_THRIFT_OPTS $HBASE_JMX_BASE -Dcom.sun.management.jmxremote.port=20103"
export HBASE_ZOOKEEPER_OPTS="$HBASE_ZOOKEEPER_OPTS $HBASE_JMX_BASE -Dcom.sun.management.jmxremote.port=20104"
Tez 优化
hive.server2.tez.default.queues=default
hive.server2.tez.initialize.default.sessions=false
hive.server2.tez.session.lifetime=162h
hive.server2.tez.session.lifetime.jitter=3h
hive.server2.tez.sessions.init.threads=16
hive.server2.tez.sessions.per.default.queue=10
hive.tez.auto.reducer.parallelism=false
hive.tez.bucket.pruning=false
hive.tez.bucket.pruning.compat=true
hive.tez.container.max.java.heap.fraction=0.8
hive.tez.container.size=-1
hive.tez.cpu.vcores=-1
hive.tez.dynamic.partition.pruning=true
hive.tez.dynamic.partition.pruning.max.data.size=104857600
hive.tez.dynamic.partition.pruning.max.event.size=1048576
hive.tez.enable.memory.manager=true
hive.tez.exec.inplace.progress=true
hive.tez.exec.print.summary=false
hive.tez.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat
hive.tez.input.generate.consistent.splits=true
hive.tez.log.level=INFO
hive.tez.max.partition.factor=2.0
hive.tez.min.partition.factor=0.25
hive.tez.smb.number.waves=0.5
hive.tez.task.scale.memory.reserve-fraction.min=0.3
hive.tez.task.scale.memory.reserve.fraction=-1.0
hive.tez.task.scale.memory.reserve.fraction.max=0.5
yarn.scheduler.fair.preemption=true
yarn.scheduler.fair.preemption.cluster-utilization-threshold=0.7
yarn.scheduler.maximum-allocation-mb=32768
yarn.scheduler.maximum-allocation-vcores=4
yarn.scheduler.minimum-allocation-mb=2048
yarn.scheduler.minimum-allocation-vcores=1
yarn.resourcemanager.scheduler.address=${yarn.resourcemanager.hostname}:8030
yarn.resourcemanager.scheduler.class=org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler
yarn.resourcemanager.scheduler.client.thread-count=50
yarn.resourcemanager.scheduler.monitor.enable=false
yarn.resourcemanager.scheduler.monitor.policies=org.apache.hadoop.yarn.server.resourcemanager.monitor.capacity.ProportionalCapacityPreemptionPolicy