4.项目中要引入的资源

pom.xml

<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>

    <groupId>org.example</groupId>
    <artifactId>project_traffic</artifactId>
    <version>1.0-SNAPSHOT</version>

    <dependencies>
        <dependency>
            <groupId>org.apache.spark</groupId>
            <artifactId>spark-sql_2.11</artifactId>
            <version>2.3.1</version>
        </dependency>
        <dependency>
            <groupId>org.apache.spark</groupId>
            <artifactId>spark-hive_2.11</artifactId>
            <version>2.3.1</version>
        </dependency>
        <dependency>
            <groupId>mysql</groupId>
            <artifactId>mysql-connector-java</artifactId>
            <version>8.0.18</version>
        </dependency>
    </dependencies>
</project>

core-site.xml

<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
  <!--在configuration标签中增加如下配置-->
  <!-- 指定HDFS中NameNode的地址 -->
  <property>
    <name>fs.defaultFS</name>
    <value>hdfs://HadoopCluster</value>
  </property>
  <!-- 指定hadoop运行时产生文件的存储目录  HDFS相关文件存放地址-->
  <property>
    <name>hadoop.tmp.dir</name>
    <value>/opt/app/hadoop-2.8.5/metaData</value>
  </property>
  <property>
    <name>ha.zookeeper.quorum</name>
    <value>node1:2181,node2:2181,node3:2181</value>
  </property>
</configuration>

hdfs-site.xml

<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
  <property>
      <name>dfs.replication</name>
      <value>3</value>
  </property>
  <!--secondary namenode地址-->
  <property>
      <name>dfs.namenode.secondary.http-address</name>
      <value>node3:50090</value>
  </property>
  <!-- 完全分布式集群名称 -->
  <property>
      <name>dfs.nameservices</name>
      <value>HadoopCluster</value>
  </property>
  <!-- 集群中NameNode节点都有哪些 -->
  <property>
      <name>dfs.ha.namenodes.HadoopCluster</name>
      <value>nn1,nn2</value>
  </property>
  <!-- nn1的RPC通信地址 -->
  <property>
      <name>dfs.namenode.rpc-address.HadoopCluster.nn1</name>
      <value>node1:9000</value>
  </property>
  <!-- nn2的RPC通信地址 -->
  <property>
      <name>dfs.namenode.rpc-address.HadoopCluster.nn2</name>
      <value>node2:9000</value>
  </property>
  <!-- nn1的http通信地址 -->
  <property>
      <name>dfs.namenode.http-address.HadoopCluster.nn1</name>
      <value>node1:50070</value>
  </property>
  <!-- nn2的http通信地址 -->
  <property>
      <name>dfs.namenode.http-address.HadoopCluster.nn2</name>
      <value>node2:50070</value>
  </property>
  <!-- 指定NameNode元数据在JournalNode上的存放位置 -->
  <property>
      <name>dfs.namenode.shared.edits.dir</name>
      <value>qjournal://node1:8485;node2:8485;node3:8485/HadoopCluster</value>
  </property>
  <!-- 配置隔离机制,即同一时刻只能有一台服务器对外响应 -->
  <property>
      <name>dfs.ha.fencing.methods</name>
      <value>sshfence</value>
  </property>
  <!-- 使用隔离机制时需要ssh无秘钥登录-->
  <property>
      <name>dfs.ha.fencing.ssh.private-key-files</name>
      <value>/root/.ssh/id_rsa</value>
  </property>
  <!-- 声明journalnode服务器存储目录-->
  <property>
      <name>dfs.journalnode.edits.dir</name>
      <value>/opt/app/hadoop/journalnodeData</value>
  </property>
  <!-- 关闭权限检查-->
  <property>
      <name>dfs.permissions.enable</name>
      <value>false</value>
  </property>
  <!-- 访问代理类:client,HadoopCluster,active配置失败自动切换实现方式-->
  <property>
        <name>dfs.client.failover.proxy.provider.HadoopCluster</name>
        <value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
  </property>
  <property>
    <name>dfs.ha.automatic-failover.enabled</name>
    <value>true</value>
  </property>
</configuration>

hive-site.xml

<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
	<property>
	  <name>javax.jdo.option.ConnectionURL</name>
	  <value>jdbc:mysql://192.168.200.111:3306/hive_metastore?serverTimezone=UTC&amp;createDatabaseIfNotExist=true</value>
	  <description>JDBC connect string for a JDBC metastore</description>
	</property>
 
	<property>
	  <name>javax.jdo.option.ConnectionDriverName</name>
	  <value>com.mysql.jdbc.Driver</value>
	  <description>Driver class name for a JDBC metastore</description>
	</property>
 
	<property>
	  <name>javax.jdo.option.ConnectionUserName</name>
	  <value>root</value>
	  <description>username to use against metastore database</description>
	</property>
 
	<property>
	  <name>javax.jdo.option.ConnectionPassword</name>
	  <value>Jsq123456...</value>
	  <description>password to use against metastore database</description>
	</property>
	<property> 
  	  <name>hive.metastore.warehouse.dir</name> 
	  <value>hdfs://HadoopCluster/user/hive/warehouse</value>
	  <description>location of default database for the warehouse</description> 
	</property>
	<!-- 支持配置显示使用的数据库名:-->
	<property>           
	  <name>hive.cli.print.header</name>配置头部的信息
	  <value>true</value>        
	</property>
	<property>           
	  <name>hive.cli.print.current.db</name>将数据库的名字进行配置
	  <value>true</value>
	</property>
</configuration>
posted @ 2022-09-15 10:17  jsqup  阅读(18)  评论(0编辑  收藏  举报