spark sql 入门
package cn.my.sparksql import cn.my.sparkStream.LogLevel import org.apache.spark.{SparkConf, SparkContext} import org.apache.spark.sql.SQLContext /** * Created by lq on 2017/8/10. */ object SqlDemo { def main(args: Array[String]): Unit = { LogLevel.setStreamingLogLevels() val conf = new SparkConf().setAppName("sql").setMaster("local[2]") val sc = new SparkContext(conf) val sqlconxt = new SQLContext(sc) System.setProperty("user.name","hadoop") val personRdd = sc.textFile("hdfs://mini1:9000/spark/student/studentCourse.dat") .map(line=>{ val fields = line.split(",") Student(fields(0),fields(1),fields(2).toInt) }) import sqlconxt.implicits._ val personDf = personRdd.toDF() personDf.registerTempTable("student") /** 1, 查询出 lily的所有成绩 2, 查询出 lily的总分 3, 查询出 数据高于 90分的同学 */ sqlconxt.sql("select * from student where name=\"Lily\" ").show(); sqlconxt.sql("select sum(score) from student where name=\"Lily\" ").show(); sqlconxt.sql("select * from student where score > 90 ").show(); } } case class Student(name:String,course:String,score:Int){}
如何在命令行中使用sparksql
学好计算机,走遍天下都不怕