JAVA反射创建DataFrame(方式一,已知数据类型)

package sparkSql.方法1创建DataFrame;

import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.sql.DataFrame;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SQLContext;

import java.util.List;

/**
* @description
* @author: 123.com
* @create: 2019-02-25 21:07:42
* 反射方式创建rdd
* 此种方式创建DataFrame需要指定数据源类型
**/

public class StuJavaClass {
public static void main(String[] args) {
SparkConf conf = new SparkConf().setAppName("java").setMaster("local");
JavaSparkContext jsc = new JavaSparkContext(conf);
SQLContext sqlContext = new SQLContext(jsc);
JavaRDD<String> javaRDD = jsc.textFile("C:\\Program Files\\feiq\\Recv Files\\day11-Spark SQL\\students.txt");

JavaRDD<StudentModel> javaRDD1 = javaRDD.map(new Function<String, StudentModel>() {
@Override
public StudentModel call(String s) throws Exception {
String[] split = s.split(",");
StudentModel model = new StudentModel();
model.setId(Integer.valueOf(split[0]));
model.setAge(Integer.valueOf(split[2]));
model.setName(split[1]);
return model;
}
});
DataFrame dataFrame = sqlContext.createDataFrame(javaRDD1, StudentModel.class);
// dataFrame.show();
//注册一个中间表 并且赋值表明
dataFrame.registerTempTable("tmp");
//针对表进行查询
DataFrame sql = sqlContext.sql("select * from tmp where age < 18");

//DataFrame转成RDD
JavaRDD<Row> rowJavaRDD = sql.javaRDD();
//将数据存储到实体类中 并且展示
JavaRDD<StudentModel> map = rowJavaRDD.map(new Function<Row, StudentModel>() {
@Override
public StudentModel call(Row row) throws Exception {
StudentModel stu = new StudentModel();
stu.setId(row.getInt(1));
stu.setAge(row.getInt(0));
stu.setName(row.getString(2));
return stu;
}
});
List<StudentModel> collect = map.collect();
for (StudentModel s : collect){
System.out.println(s.getId()+" "+s.getAge()+" "+s.getName());
}
// sql.show();
}
}
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>

<groupId>com.qf</groupId>
<artifactId>hz1802</artifactId>
<version>1.0-SNAPSHOT</version>
<properties>
<maven.compiler.source>1.8</maven.compiler.source>
<maven.compiler.target>1.8</maven.compiler.target>
<encoding>UTF-8</encoding>
<scala.version>2.10.6</scala.version>
<spark.version>1.6.3</spark.version>
<hadoop.version>2.6.4</hadoop.version>
</properties>
<dependencies>
<dependency>
<groupId>org.scala-lang</groupId>
<artifactId>scala-library</artifactId>
<version>${scala.version}</version>
</dependency>

<dependency>
<groupId>com.typesafe.akka</groupId>
<artifactId>akka-actor_2.10</artifactId>
<version>2.3.14</version>
</dependency>

<dependency>
<groupId>com.typesafe.akka</groupId>
<artifactId>akka-remote_2.10</artifactId>
<version>2.3.14</version>
</dependency>

<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.10</artifactId>
<version>${spark.version}</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-sql_2.10</artifactId>
<version>${spark.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>${hadoop.version}</version>
</dependency>

</dependencies>

<build>
<sourceDirectory>src/main/spark</sourceDirectory>
<plugins>
<plugin>
<groupId>net.alchim31.maven</groupId>
<artifactId>scala-maven-plugin</artifactId>
<version>3.2.2</version>
<executions>
<execution>
<goals>
<goal>compile</goal>
<goal>testCompile</goal>
</goals>
<configuration>
<args>
<arg>-dependencyfile</arg>
<arg>${project.build.directory}/.scala_dependencies</arg>
</args>
</configuration>
</execution>
</executions>
</plugin>

<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-shade-plugin</artifactId>
<version>2.4.3</version>
<executions>
<execution>
<phase>package</phase>
<goals>
<goal>shade</goal>
</goals>
<configuration>
<filters>
<filter>
<artifact>*:*</artifact>
<excludes>
<exclude>META-INF/*.SF</exclude>
<exclude>META-INF/*.DSA</exclude>
<exclude>META-INF/*.RSA</exclude>
</excludes>
</filter>
</filters>
<transformers>
<transformer implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
<mainClass></mainClass>
</transformer>
</transformers>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>

</project>
 
posted @ 2019-02-25 22:04  VIP8cnbl  阅读(1319)  评论(0编辑  收藏  举报