spark sql 入门

package cn.my.sparksql

import cn.my.sparkStream.LogLevel
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.sql.SQLContext

/**
  * Created by lq on 2017/8/10.
  */
object SqlDemo {
  def main(args: Array[String]): Unit = {
    LogLevel.setStreamingLogLevels()
    val conf = new SparkConf().setAppName("sql").setMaster("local[2]")
    val sc = new SparkContext(conf)
    val sqlconxt = new SQLContext(sc)

    System.setProperty("user.name","hadoop")
    val personRdd = sc.textFile("hdfs://mini1:9000/spark/student/studentCourse.dat")
      .map(line=>{
        val fields = line.split(",")
        Student(fields(0),fields(1),fields(2).toInt)
      })

    import sqlconxt.implicits._
    val personDf = personRdd.toDF()

    personDf.registerTempTable("student")
    /**
    1，    查询出  lily的所有成绩
    2，    查询出  lily的总分
    3，    查询出  数据高于 90分的同学
      */
    sqlconxt.sql("select * from student where name=\"Lily\" ").show();
    sqlconxt.sql("select sum(score) from student where name=\"Lily\" ").show();
    sqlconxt.sql("select * from student where score > 90 ").show();


  }
}



case class Student(name:String,course:String,score:Int){}

如何在命令行中使用sparksql

posted @ 2017-08-14 11:24 牵牛花阅读(237) 评论(0) 编辑收藏举报

刷新页面返回顶部

牵牛花

spark sql 入门

公告