Scala_Load csv data to hive via spark2.1
code:
package com.liupu import org.apache.spark.{ SparkContext, SparkConf } import org.apache.spark.sql.hive.HiveContext import org.apache.spark.sql.hive.orc._ object HiveContextLoadCsv { def main(args: Array[String]) { var sc = new SparkContext() val hiveContext = new org.apache.spark.sql.hive.HiveContext(sc) val df = hiveContext.read .format("com.databricks.spark.csv") .option("header", "true") .option("inferSchema", "true") .load("/tmp/cars.csv") val selectedData = df.select("year", "model") selectedData.write.format("orc").option("header", "true").save("/tmp/hive_cars") hiveContext.sql("create external table hive_cars(year int,model string) stored as orc location '/tmp/hive_cars'") hiveContext.sql("show tables").collect().foreach(println) hiveContext.sql("select * from hive_cars").collect().foreach(println) sc.stop() } }
spark submit:
./spark-submit \ --class com.liupu.HiveContextLoadCsv \ --master local[*] \ /home/pl62716/scalaTest.jar