Spark之 使用SparkSql操作Hive的Scala程序实现
依赖
<dependency> <groupId>org.apache.spark</groupId> <artifactId>spark-hive_2.11</artifactId> <version>2.1.3</version> </dependency>
scala代码
package com.zy.sparksql import org.apache.spark.SparkContext import org.apache.spark.sql.SparkSession /** * 通过spark操作hive 把hive.site.xml放到resources中即可把元数据信息写入配置的mysql中 */ object HiveSupport { def main(args: Array[String]): Unit = { //创建sparkSession val sparkSession: SparkSession = SparkSession.builder().appName("HiveSupport").master("local[2]").enableHiveSupport().getOrCreate() //获取sc val sc: SparkContext = sparkSession.sparkContext sc.setLogLevel("WARN") //操作hive // sparkSession.sql("create table if not exists person(id int,name string,age int) row format delimited fields terminated by ','") // sparkSession.sql("load data local inpath './data/person.txt' into table person") sparkSession.sql("select * from person").show() sparkSession.stop() } }
hive-site.xml
<configuration> <property> <name>javax.jdo.option.ConnectionURL</name> <value>jdbc:mysql://192.168.44.31:3306/hive?createDatabaseIfNotExist=true</value> <description>JDBC connect string for a JDBC metastore</description> </property> <property> <name>javax.jdo.option.ConnectionDriverName</name> <value>com.mysql.jdbc.Driver</value> <description>Driver class name for a JDBC metastore</description> </property> <property> <name>javax.jdo.option.ConnectionUserName</name> <value>root</value> <description>username to use against metastore database</description> </property> <property> <name>javax.jdo.option.ConnectionPassword</name> <value>root</value> <description>password to use against metastore database</description> </property> </configuration>
还需要把hdfs上的user/hive/warehouse目录 chmod 777,不然程序访问不了会报错