SparkSql
pom
1 <?xml version="1.0" encoding="UTF-8"?> 2 <project xmlns="http://maven.apache.org/POM/4.0.0" 3 xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" 4 xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> 5 <modelVersion>4.0.0</modelVersion> 6 7 <groupId>org.example</groupId> 8 <artifactId>test3-24</artifactId> 9 <version>1.0-SNAPSHOT</version> 10 <dependencies> 11 <dependency> 12 <groupId>org.projectlombok</groupId> 13 <artifactId>lombok</artifactId> 14 <version>1.18.16</version> 15 </dependency> 16 <dependency> 17 <groupId>org.scala-lang</groupId> 18 <artifactId>scala-library</artifactId> 19 <version>2.12.4</version> 20 </dependency> 21 <dependency> 22 <groupId>org.scala-lang</groupId> 23 <artifactId>scala-compiler</artifactId> 24 <version>2.12.4</version> 25 </dependency> 26 <dependency> 27 <groupId>org.scala-lang</groupId> 28 <artifactId>scala-reflect</artifactId> 29 <version>2.12.4</version> 30 </dependency> 31 <dependency> 32 <groupId>log4j</groupId> 33 <artifactId>log4j</artifactId> 34 <version>1.2.12</version> 35 </dependency> 36 <dependency> 37 <groupId>org.apache.spark</groupId> 38 <artifactId>spark-core_2.12</artifactId> 39 <version>3.0.0</version> 40 </dependency> 41 42 43 <dependency> 44 <groupId>org.apache.spark</groupId> 45 <artifactId>spark-sql_2.12</artifactId> 46 <version>3.0.0</version> 47 </dependency> 48 49 <dependency> 50 <groupId>org.apache.spark</groupId> 51 <artifactId>spark-hive_2.12</artifactId> 52 <version>3.0.0</version> 53 </dependency> 54 55 <dependency> 56 <groupId>mysql</groupId> 57 <artifactId>mysql-connector-java</artifactId> 58 <version>5.1.6</version> 59 <scope>runtime</scope> 60 </dependency> 61 62 </dependencies> 63 64 <build> 65 <plugins> 66 <plugin> 67 <groupId>org.scala-tools</groupId> 68 <artifactId>maven-scala-plugin</artifactId> 69 <version>2.15.2</version> 70 <executions> 71 <execution> 72 <goals> 73 <goal>compile</goal> 74 <goal>testCompile</goal> 75 </goals> 76 </execution> 77 </executions> 78 </plugin> 79 </plugins> 80 </build> 81 82 </project>
bean
1 import lombok.AllArgsConstructor; 2 import lombok.Data; 3 import lombok.NoArgsConstructor; 4 5 @Data 6 @NoArgsConstructor 7 @AllArgsConstructor 8 public class Date { 9 //Date.txt文件定义了日期的分类,将每天分别赋予所属的月份、星期、季度等属性 10 // 日期,年月,年,月,日,周几,第几周,季度,旬、半月 11 private String data; 12 private String year_month; 13 private String year; 14 private String month; 15 private String day; 16 private String week; 17 private String week_th; 18 private String quarter; 19 private String a_period_of_ten_days; 20 private String meniscus; 21 }
1 import lombok.AllArgsConstructor; 2 import lombok.Data; 3 import lombok.NoArgsConstructor; 4 5 @AllArgsConstructor 6 @NoArgsConstructor 7 @Data 8 public class Details { 9 //订单号,行号,货品,数量,价格,金额 10 private String orderNo; 11 private String rowkey; 12 private String shop; 13 private String num; 14 private String price; 15 private String Amount; 16 }
test
1 import org.apache.spark.SparkConf; 2 import org.apache.spark.SparkContext; 3 import org.apache.spark.api.java.JavaRDD; 4 import org.apache.spark.api.java.function.Function; 5 import org.apache.spark.rdd.RDD; 6 import org.apache.spark.sql.Dataset; 7 import org.apache.spark.sql.Row; 8 import org.apache.spark.sql.SparkSession; 9 10 11 public class SparkSql { 12 13 public static void main(String[] args) throws Exception { 14 //spark conf 15 SparkConf conf = new SparkConf().setMaster("local[2]").setAppName("app"); 16 //spark context 17 SparkContext sparkContext = new SparkContext(conf); 18 //spark session 19 SparkSession session = SparkSession.builder().config(conf).getOrCreate(); 20 SparkSession sparkSession = SparkSession.builder().appName("name").master("local[*]").getOrCreate(); 21 //from windows 22 Dataset<String> dataset = sparkSession.read().textFile("C:\\Date.txt"); 23 //javaRDD<Date> 24 JavaRDD<Date> datemap = dataset.toJavaRDD().map(new Function<String, Date>() { 25 @Override 26 public Date call(String v1) throws Exception { 27 String[] split = v1.split(","); 28 return new Date(split[0],split[1],split[2],split[3],split[4],split[5],split[6],split[7],split[8],split[9]); 29 } 30 }); 31 //from hdfs 32 RDD<String> stringRDD = sparkContext.textFile("hdfs://hadoop106:8020/StockDetail.txt",1); 33 JavaRDD<String> stringJavaRDD = stringRDD.toJavaRDD(); 34 //javaRDD<Details> 35 JavaRDD<Details> map = stringJavaRDD.map(new Function<String, Details>() { 36 @Override 37 public Details call(String s) throws Exception { 38 String[] split = s.split(","); 39 return new Details(split[0], split[1], split[2], split[3], split[4],split[5]); 40 41 } 42 }); 43 44 Dataset<Row> dateDataFrame = session.createDataFrame(datemap, Date.class); 45 Dataset<Row> dataFrame = session.createDataFrame(map, Details.class); 46 47 dateDataFrame.createTempView("date"); 48 dataFrame.createTempView("detail"); 49 50 Dataset<Row> dateSql = sparkSession.sql("select * from date"); 51 Dataset<Row> sql = session.sql("select * from detail"); 52 53 dateSql.show(); 54 sql.show(); 55 56 57 } 58 }
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· 10年+ .NET Coder 心语 ── 封装的思维:从隐藏、稳定开始理解其本质意义
· 地球OL攻略 —— 某应届生求职总结
· 提示词工程——AI应用必不可少的技术
· Open-Sora 2.0 重磅开源!
· 周边上新:园子的第一款马克杯温暖上架