sparksql hive作为数据源
根据官方文档的说法,要把hive-site.xml,core-site.xml,hdfs-site.xml拷贝到spark的conf目录下,保证mysql已经启动
java
1 public class Demo {
2 private static SparkSession session = SparkSession.builder().appName("demo").enableHiveSupport()
3 .config("spark.sql.warehouse.dir", "/user/hive/warehouse").getOrCreate();
4
5 public static void main(String[] args) {
6 session.sql("drop table if exists students_info");
7 session.sql("create table if not exists students_info(name string,age int) "
8 + "row format delimited fields terminated by '\t' \r\n");
9
10 // 将数据导入学生信息表
11 session.sql(
12 "load data local inpath '/opt/module/spark-test/data/student_infos.txt' into table default.students_info");
13
14 session.sql("drop table if exists students_score");
15 session.sql("create table if not exists students_score(name string,score int) \r\n"
16 + "row format delimited fields terminated by '\t' \r\n");
17
18 // 将数据导入学生成绩表
19 session.sql(
20 "load data local inpath '/opt/module/spark-test/data/student_scores.txt' into table default.students_score");
21
22 // 查询
23 Dataset<Row> dataset = session.sql(
24 "select s1.name,s1.age,s2.score from students_info s1 join students_score s2 on s1.name=s2.name where s2.score>80");
25
26 // 将dataset中的数据保存到hive中
27 session.sql("drop table if exists students_result");
28 dataset.write().saveAsTable("students_result");
29
30 // 将hive中的表转成dataset,查看数据是否成功保存
31 Dataset<Row> table = session.table("students_result");
32 table.show();
33
34 session.stop();
35
36 }
37 }
scala
1 object Demo {
2 def main(args: Array[String]): Unit = {
3 val session = SparkSession.builder().appName("demo").enableHiveSupport().config("spark.sql.warehouse.dir", "/user/hive/warehouse").getOrCreate()
4
5 session.sql("drop table if exists students_info")
6 session.sql("create table if not exists students_info(name string,age int) \r\n row format delimited fields terminated by '\t'")
7
8 session.sql("load data local inpath '/opt/module/spark-test/data/student_infos.txt' into table default.students_info")
9
10 session.sql("drop table if exists students_score")
11 session.sql("create table if not exists students_score(name string,score int) \r\n row format delimited fields terminated by '\t'")
12
13 session.sql("load data local inpath '/opt/module/spark-test/data/student_scores.txt' into table default.students_score")
14
15 //保存到hive中
16 session.sql("drop table if exists students_result")
17 session.sql("select s1.name,s1.age,s2.score from students_info s1 join students_score s2 on s1.name=s2.name where s2.score >90").write.saveAsTable("students_result")
18
19 //检查数据是否保存
20 val df = session.table("students_result")
21 df.show()
22
23 session.stop()
24 }
25 }