sparksql实践_1
1.需求:
读取test表中的客群编码以及需要执行的获取客群明细的sql语句
执行获取客群明细sql
将客群编码与客群明细写入结果表中
2,.数据准备
drop table sospdm.tmp_yinfei_test; create table sospdm.tmp_yinfei_test ( id string comment '客群id' ,exe_sql string comment '执行sql' ) comment '客群编码+执行sql' row format delimited fields terminated by '\t' ;
drop table sospdm.tmp_yinfei_result; create table sospdm.tmp_yinfei_result ( id string comment '客群id' ,cust_num string comment '会员编码' )comment '结果表,客群编码+客群明细' row format delimited fields terminated by '\t' ;
create table sospdm.tmp_yinfei_cust_num ( cust_num string comment '会员编码' ) comment '客群明细表';
insert overwrite table sospdm.tmp_yinfei_cust_num select 'a' as cust_num from sospdm.dual union all select 'b' as cust_num from sospdm.dual union all select 'c' as cust_num from sospdm.dual ; insert overwrite table sospdm.tmp_yinfei_test select '1' as id,'select cust_num from sospdm.tmp_yinfei_cust_num' from dual union all select '2' as id,'select cust_num from sospdm.tmp_yinfei_cust_num where cust_num in (\'a\',\'b\')' from dual ;
3.编码实现
package spark import org.apache.spark.SparkConf import org.apache.spark.rdd.RDD import org.apache.spark.sql._ object Execute extends App { case class Person(id: String, cust_num: String) val sparkConf = new SparkConf().setAppName("Read") val session = SparkSession.builder().config(sparkConf).enableHiveSupport().getOrCreate() //从表中获取客群以及执行sql语句 val ex: Array[Row] = session.sql("select id,exe_sql from sospdm.tmp_yinfei_test").rdd.collect() import session.implicits._ //结果表数据清除,或者采用分区表 session.sql("truncate table sospdm.tmp_yinfei_result ") //遍历执行sql,与客群id进行笛卡尔关联 for (row <- ex) { val id: String = row.getString(0) val ex_sql: String = row.getString(1) val id_rdd = session.sparkContext.parallelize(List(id)) val cust_num = session.sql(ex_sql).rdd val idCust: RDD[(String, Row)] = id_rdd cartesian (cust_num) val result = idCust.map(person => { Person(person._1.toString, person._2.getString(0)) }).toDF() //创建临时表 result.registerTempTable("tmp_yinfei_cu") //结果数据插入结果表 session.sql("insert into table sospdm.tmp_yinfei_result select id,cust_num from tmp_yinfei_cu") } }
4.结果展示