SparkSQL——DataFrame API基本操作
package com.spark import org.apache.spark.sql.SparkSession /** * DataFrame API基本操作 */ object DataFrameAPP1 { def main(args: Array[String]): Unit = { val path="E:\\data\\infos.txt" val spark=SparkSession.builder().appName("DataFrameApp").master("local[2]").getOrCreate() val peopleDF=spark.read.format("json").load(path) peopleDF.printSchema() //输出前20条数据 peopleDF.show() //select name from table peopleDF.select("name").show() //select name ,age+10 as age2 from table peopleDF.select(peopleDF.col("name"),(peopleDF.col("age")+10).as("age2")).show() //select * from table where age>19 peopleDF.filter(peopleDF.col("age")>19).show() //select age,count(1) from table group by age peopleDF.groupBy("age").count().show() spark.stop() } }