sparksql 用反射的方式将rdd转换成dataset/dataframe

java

 1 public class ReflectionDemo {
 2     private static SparkConf conf = new SparkConf().setAppName("reflectdemo").setMaster("local");
 3     private static JavaSparkContext jsc = new JavaSparkContext(conf);
 4 
 5     private static SparkSession session = new SparkSession(jsc.sc());
 6 
 7     public static void main(String[] args) throws AnalysisException {
 8 
 9         // rdd
10         JavaRDD<String> rdd = jsc.textFile("./src/main/java/cn/tele/spark_sql/rdd2dataset/students.txt");
11 
12         // 封装rdd
13         JavaRDD<Student> mapRdd = rdd.map(new Function<String, Student>() {
14 
15             private static final long serialVersionUID = 1L;
16 
17             @Override
18             public Student call(String v1) throws Exception {
19                 String[] fields = v1.split(",");
20                 Student student = new Student(Integer.valueOf(fields[0]), fields[1], Integer.valueOf(fields[2]));
21                 return student;
22             }
23         });
24 
25         // 通过反射的方式进行转换
26         Dataset<Row> dataset = session.createDataFrame(mapRdd, Student.class);
27         
28         // 注册为临时信息表
29         // dataset.registerTempTable("students");
30 
31         dataset.createOrReplaceTempView("studentsView");
32 
33         Dataset<Row> result = session.sql("select * from studentsView where age<=18");
34         // result.show();
35 
36         // 把dataset转换成rdd
37         JavaRDD<Student> javaRDD = result.javaRDD().map(new Function<Row, Student>() {
38 
39             private static final long serialVersionUID = 1L;
40 
41             @Override
42             public Student call(Row v1) throws Exception {
43                 return new Student(v1.getAs("id"), v1.getAs("name"), v1.getAs("age"));
44             }
45         });
46 
47         javaRDD.foreach(new VoidFunction<Student>() {
48 
49             private static final long serialVersionUID = 1L;
50 
51             @Override
52             public void call(Student t) throws Exception {
53                 System.out.println(t);
54             }
55         });
56         session.stop();
57         jsc.close();
58     }
59 }

scala

 1 case class Student(val id:Int,val name:String,val age:Int)
 2 
 3 object ReflectionDemo {
 4   def main(args: Array[String]): Unit = {
 5     val conf = new SparkConf().setAppName("reflectdemo").setMaster("local")
 6 
 7     val sc = new SparkContext(conf)
 8 
 9     val sqlContext = new SQLContext(sc)
10 
11     //导入隐式转换
12     import sqlContext.implicits._
13 
14     //创建rdd
15     val rdd = sc.textFile("./src/main/scala/cn/tele/spark_sql/rdd2dataframe/students.txt", 2)
16 
17     //转换rdd为dataframe
18     val dataframe = rdd.map(line => {
19       val arr = line.split(",");
20       new Student(arr(0).trim().toInt, arr(1).trim(), arr(2).trim().toInt)
21     }).toDF()
22 
23     //创临时视图
24     dataframe.createOrReplaceTempView("students")
25 
26     //  dataframe.show()
27 
28     val df = sqlContext.sql("select * from students where age<=18")
29 
30     val newRdd = df.rdd.map(row => new Student(row.getAs[Int]("id"), row.getAs[String]("name"), row.getAs[Int]("age")))
31 
32     newRdd.foreach(println(_))
33   }
34 
35 }

 

posted @ 2019-02-13 16:44  tele  阅读(563)  评论(0编辑  收藏  举报