yangyang12138

导航

flink(一)

1.概述

 Apache Flink 是近年来越来越流行的一款开源大数据计算引擎,它同时支持了批处理和流处理.

2.安装
下载:https://mirror.bit.edu.cn/apache/flink/flink-1.10.0/flink-1.10.0-bin-scala_2.12.tgz
解压到指定目录,
配置flink-conf.yaml配置env.java.home
 
切换到bin目录下
./start-cluster.sh 启动flink
 
localhost:8081查看web界面。
 
3.demo
首先启动一个socket用于发送数据
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.net.*;

public class SocketMain {
    private Socket socket;

    public SocketMain(Socket socket) {
        this.socket = socket;
    }

    public static void main(String[] args) throws Exception {


        ServerSocket servSock=new ServerSocket(55901);
        
        while(true){

            Socket clntSock=servSock.accept();
            SocketAddress clientAddress=clntSock.getRemoteSocketAddress();
            System.out.println("Handling client at "+clientAddress);

            //b,使用所返回的Socket实例的InputStream和OutputStream与客户端进行通信
            InputStream in=clntSock.getInputStream();
            OutputStream out=clntSock.getOutputStream();

            for(int i=0;i<100;i++){
                byte[] data = (String.valueOf(i) + " " + String.valueOf(i) + "\n").getBytes();
                out.write(data,0,data.length);
                Thread.sleep(500);

                System.out.println(i);
            }

            //c,通信完成后,使用Socket的close()方法关闭该客户端套接字链接
            clntSock.close();

        }
    }
}

运行flink

package demo;

import org.apache.flink.streaming.api.windowing.time.Time;
import org.apache.flink.api.common.functions.ReduceFunction;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;


public class Main {

    private static final Logger LOG = LoggerFactory.getLogger(Main.class);

    public static void main(String[] args) throws Exception {


        // get the execution environment
        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

        // get input data by connecting to the socket
        DataStream<String> text = env.socketTextStream("localhost", 55901, "\n");

        DataStream<WordWithCount> windowCounts = text
                .flatMap(new FlatMapFunction<String, WordWithCount>() {
                    @Override
                    public void flatMap(String s, org.apache.flink.util.Collector<WordWithCount> collector) throws Exception {
                        for (String word : s.split("\\s")) {
                            collector.collect(new WordWithCount(word, 1L));
                        }
                    }
                })
                .keyBy("word")
                .timeWindow(Time.seconds(5), Time.seconds(1))
                .reduce(new ReduceFunction<WordWithCount>() {
                    @Override
                    public WordWithCount reduce(WordWithCount a, WordWithCount b) {
                        return new WordWithCount(a.word, a.count + b.count);
                    }
                });

        // print the results with a single thread, rather than in parallel
        windowCounts.print();

        env.execute("Socket Window WordCount");
    }
    
    public static class WordWithCount {

        public String word;
        public long count;

        public WordWithCount(String word, long count) {
            this.word = word;
            this.count = count;


            LOG.info(this.toString());
        }

        @Override
        public String toString() {
            return word + " : " + count;
        }
    }
}

 

posted on 2020-03-26 01:05  杨杨09265  阅读(174)  评论(0编辑  收藏  举报