sparksql dataset
java
1 /**
2 *2.0之后使用sparksession即可,不需要再去创建sqlcontext
3 *@author Tele
4 *
5 */
6 public class Demo {
7 private static SparkConf conf = new SparkConf().setAppName("dataframedemo").setMaster("local");
8 private static JavaSparkContext jsc = new JavaSparkContext(conf);
9
10 private static SparkSession session = new SparkSession(jsc.sc());
11
12 //创建sparksession(可以不创建jsc)
13 // private static SparkSession session = SparkSession.builder().appName("asd").master("local").getOrCreate();
14
15
16 // private static SQLContext sqlContext = new SQLContext(session);
17
18
19 public static void main(String[] args) {
20
21 //在java中返回是dataset,在scala中返回的是dataframe
22 // sqlContext.read().json("xx")
23
24 Dataset<Row> dataset = session.read().json("./src/main/java/cn/tele/spark_sql/dataframe/students.json");
25 //输出全部数据
26 dataset.show();
27
28
29 //输出某一列
30 dataset.select("id").show();
31 dataset.select(dataset.col("id")).show();
32
33
34 //判断id>2
35 dataset.select(dataset.col("id").gt(2)).show();
36
37 //判断id>=2
38 dataset.select(dataset.col("id").geq(2)).show();
39
40 //将age += 100
41 dataset.select(dataset.col("age").plus(100)).show();
42
43 //输出元数据
44 dataset.printSchema();
45
46
47 jsc.close();
48
49
50 }
51 }
scala
1 object Demo {
2 def main(args: Array[String]): Unit = {
3 val conf = new SparkConf().setAppName("demo").setMaster("local")
4 val sc = new SparkContext(conf)
5
6 val sqlcontext = new SQLContext(sc)
7
8
9 val dataframe = sqlcontext.read.json("./src/main/scala/cn/tele/spark_sql/dataframe/students.json")
10
11
12 dataframe.select(dataframe.col("id")).show()
13
14
15 dataframe.select(dataframe.col("id").plus(1)).show()
16
17 dataframe.select(dataframe.col("id") + 1).show()
18
19 dataframe.select("id","name").show()
20
21 dataframe.select(dataframe.col("id").geq("2")).show()
22
23 dataframe.printSchema()
24
25 }
26 }