flink 离线 wc

pom文件

  1 <?xml version="1.0" encoding="UTF-8"?>
  2 <project xmlns="http://maven.apache.org/POM/4.0.0"
  3          xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
  4          xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
  5     <modelVersion>4.0.0</modelVersion>
  6 
  7     <groupId>com.test</groupId>
  8     <artifactId>test-flink</artifactId>
  9     <version>1.0</version>
 10 
 11     <properties>
 12         <project.build.sourceEncoding>utf-8</project.build.sourceEncoding>
 13         <flink.version>1.10.0</flink.version>
 14         <scala.version>2.12.6</scala.version>
 15         <scala.binary.version>2.12</scala.binary.version>
 16     </properties>
 17     <dependencies>
 18         <dependency>
 19             <groupId>org.apache.flink</groupId>
 20             <artifactId>flink-scala_${scala.binary.version}</artifactId>
 21             <version>${flink.version}</version>
 22         </dependency>
 23         <dependency>
 24             <groupId>org.apache.flink</groupId>
 25             <artifactId>flink-streaming-scala_${scala.binary.version}</artifactId>
 26             <version>${flink.version}</version>
 27         </dependency>
 28         <dependency>
 29             <groupId>org.apache.flink</groupId>
 30             <artifactId>flink-connector-kafka_${scala.binary.version}</artifactId>
 31             <version>${flink.version}</version>
 32         </dependency>
 33         <dependency>
 34             <groupId>org.scala-lang</groupId>
 35             <artifactId>scala-library</artifactId>
 36             <version>${scala.version}</version>
 37         </dependency>
 38         <dependency>
 39             <groupId>org.slf4j</groupId>
 40             <artifactId>slf4j-log4j12</artifactId>
 41             <version>1.7.7</version>
 42         </dependency>
 43         <dependency>
 44             <groupId>log4j</groupId>
 45             <artifactId>log4j</artifactId>
 46             <version>1.2.17</version>
 47         </dependency>
 48         <dependency>
 49             <groupId>com.fasterxml.jackson.core</groupId>
 50             <artifactId>jackson-databind</artifactId>
 51             <version>2.10.2</version>
 52         </dependency>
 53         <!--<dependency>
 54             <groupId>net.sf.json-lib</groupId>
 55             <artifactId>json-lib</artifactId>
 56             <version>2.4</version>
 57         </dependency>-->
 58         <dependency>
 59             <groupId>org.apache.hbase</groupId>
 60             <artifactId>hbase-client</artifactId>
 61             <version>2.2.0</version>
 62         </dependency>
 63         <dependency>
 64             <groupId>org.elasticsearch</groupId>
 65             <artifactId>elasticsearch</artifactId>
 66             <version>7.3.1</version>
 67         </dependency>
 68         <dependency>
 69             <groupId>org.elasticsearch.client</groupId>
 70             <artifactId>elasticsearch-rest-high-level-client</artifactId>
 71             <version>7.3.1</version>
 72         </dependency>
 73         <dependency>
 74             <groupId>org.eclipse.jetty</groupId>
 75             <artifactId>jetty-util-ajax</artifactId>
 76             <version>9.4.26.v20200117</version>
 77         </dependency>
 78         <dependency>
 79             <groupId>mysql</groupId>
 80             <artifactId>mysql-connector-java</artifactId>
 81             <version>5.1.39</version>
 82         </dependency>
 83 
 84     </dependencies>
 85     <build>
 86         <plugins>
 87             <!-- 该插件用于将 Scala 代码编译成 class 文件 -->
 88             <plugin>
 89                 <groupId>net.alchim31.maven</groupId>
 90                 <artifactId>scala-maven-plugin</artifactId>
 91                 <version>3.4.6</version>
 92                 <executions>
 93                     <execution>
 94                         <!-- 声明绑定到 maven 的 compile 阶段 -->
 95                         <goals>
 96                             <goal>testCompile</goal>
 97                         </goals>
 98                     </execution>
 99                 </executions>
100             </plugin>
101 
102             <plugin>
103                 <groupId>org.apache.maven.plugins</groupId>
104                 <artifactId>maven-assembly-plugin</artifactId>
105                 <version>3.0.0</version>
106                 <configuration>
107                     <descriptorRefs>
108                         <descriptorRef>jar-with-dependencies</descriptorRef>
109                     </descriptorRefs>
110                 </configuration>
111                 <executions>
112                     <execution>
113                         <id>make-assembly</id>
114                         <phase>package</phase>
115                         <goals>
116                             <goal>single</goal>
117                         </goals>
118                     </execution>
119                 </executions>
120             </plugin>
121         </plugins>
122     </build>
123 </project>
View Code

 

代码

 1 import org.apache.flink.api.scala._
 2 object FlinkDemo01_DataSet_wc {
 3     def main(args: Array[String]): Unit = {
 4         //1 创建执行环境
 5         val env = ExecutionEnvironment.getExecutionEnvironment
 6         //2 读取数据
 7         val ds: DataSet[String] = env.readTextFile("file:///I:\\projectImplement\\dataWareHouse\\test-es\\data\\word.log")
 8         //3 执行
 9         val words: DataSet[String] = ds.flatMap(_.split(" "))
10         val pairs = words.map((_,1))
11         val keyAndNums: GroupedDataSet[(String, Int)] = pairs.groupBy(0)
12         val result: AggregateDataSet[(String, Int)] = keyAndNums.sum(1)
13         result.print()
14     }
15 }
View Code

 

posted on 2020-06-22 01:12  天天背单词  阅读(202)  评论(0编辑  收藏  举报

导航