Scala_Load csv data to hive via spark2.1_via pass parameters_HiveAllType
prepare CSV data
NT,col_SMALLINT,col_BIGINT,col_INT,col_FLOAT,col_DOUBLE,col_DECIMAL,col_TIMESTAMP,col_DATE,col_INTERVAL,col_STRING,col_VARCHAR,col_CHAR,col_BOOLEAN,col_BINARY 100Y,100S,100L,100,1.11,2.22,4.44,2013Â01-01 01:50:50,2013Â01-01,2 SECONDS,"""123""",ab,a,TRUE,111
create CSV file
[Dev root @ sd-9c1f-2eac /tmp/pl62716] # vi CsvLoadToHive.csv col_TINYINT,col_SMALLINT,col_BIGINT,col_INT,col_FLOAT,col_DOUBLE,col_DECIMAL,col_TIMESTAMP,col_DATE,col_INTERVAL,col_STRIN G,col_VARCHAR,col_CHAR,col_BOOLEAN,col_BINARY 100Y,100S,100L,100,1.11,2.22,4.44,2013Â01-01 01:50:50,2013Â01-01,2 SECONDS,"""123""",ab,a,TRUE,111
scala test code
package com.liupu import org.apache.spark.{ SparkContext, SparkConf } import org.apache.spark.sql.hive.HiveContext import org.apache.spark.sql.hive.orc._ object LoadCsv3 { def main(args: Array[String]) { var sourceCsvPath = args(0) var targetPath = args(1) var hiveTableName = args(2) var sc = new SparkContext() val hiveContext = new org.apache.spark.sql.hive.HiveContext(sc) val df = hiveContext.read .format("com.databricks.spark.csv") .option("header", "true") .option("inferSchema", "true") .load(sourceCsvPath) val selectedData = df.select("col_TINYINT","col_SMALLINT","col_BIGINT","col_INT","col_FLOAT","col_DOUBLE","col_DECIMAL","col_TIMESTAMP","col_DATE","col_INTERVAL","col_STRING","col_VARCHAR","col_CHAR","col_BOOLEAN","col_BINARY") selectedData.write.format("orc").option("header", "true").save(targetPath) hiveContext.sql(s"create external table $hiveTableName(col_TINYINT TINYINT,col_SMALLINT SMALLINT,col_BIGINT BIGINT,col_INT INT,col_FLOAT FLOAT,col_DOUBLE DOUBLE,col_DECIMAL DECIMAL,col_TIMESTAMP TIMESTAMP,col_DATE DATE,col_INTERVAL INTERVAL,col_STRING STRING,col_VARCHAR VARCHAR,col_CHAR CHAR,col_BOOLEAN BOOLEAN,col_BINARY BINARY) stored as orc location '$targetPath'") hiveContext.sql("show tables").collect().foreach(println) sc.stop() } }
spark test