FlinkSql指定时间语义

FlinkSql指定时间语义

FlinkSql在建表时指定时间语义,根据建表方式和时间语义的不同进行记录

1.从DataStream流建表+process time语义

因为是process time所以不需要指定watermark的延迟生成时间,故可以直接在创建table对象时最后一列增加一个字段即可

  • 举例
  public static void main(String[] args) throws Exception {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(1);
        DataStreamSource<String> dataStreamSource = env.readTextFile("D:\\workspace21\\myflink\\src\\main\\resources\\sensors.txt");
        DataStream<SensorReading> mapDataStream = dataStreamSource.map(el -> {
            String[] split = el.split(",");
            return new SensorReading(split[0], Double.valueOf(split[2]), Long.valueOf(split[1]));
        });
        StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env);
        //pt就是我们要增加的process time字段 名字可以任意命名
        Table tableApi = tableEnv.fromDataStream(mapDataStream, "id,temperature as temp,timestamp,pt.proctime");
        tableApi.printSchema();
        tableEnv.toAppendStream(tableApi, Row.class).print("api");
        env.execute();
    }
  • 此时打印表的Schema可以看到表最后增加了一列
root
 |-- id: STRING
 |-- temp: DOUBLE
 |-- timestamp: BIGINT
 |-- pt: TIMESTAMP(3) *PROCTIME*

2.使用connect+format+schema建表+process time语义

  • 举例
  public static void main(String[] args) throws Exception {
        //对于流式环境 StreamExecutionEnvironment 是必不可少的
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        //快速创建时使用默认的planner 版本不同默认不同
        StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env);
        //2. 连接数据源并注册成一张表
        String filePath = "D:\\workspace21\\myflink\\src\\main\\resources\\sensors.txt";
        tableEnv.connect(new FileSystem().path(filePath))
                //withFormat 是用来告诉flink我应该怎么处理来源用的每一条数据 比如按csv的格式,号分割
                .withFormat(new Csv())
                //withSchema 是声明创建表的表结构 要按照解析得到的数据的先后顺序对应
                .withSchema(new Schema()
                        .field("id", DataTypes.STRING())
                        .field("time", DataTypes.BIGINT())
                        .field("temp", DataTypes.DOUBLE())
                        //在最后直接追加一列即可
                        .field("pt", DataTypes.TIMESTAMP(3)
                                //现在的还不完善 低版本没有这个方法       
                                //.processTime()
                        )
                )

                .createTemporaryTable("inputTable");
        Table inputTable = tableEnv.from("inputTable");
        inputTable.printSchema();
        tableEnv.toAppendStream(inputTable, Row.class);
        env.execute();

    }

3.使用DDL方式建表+process time语义

String sinkDDL =
"create table dataTable (" +
" id varchar(20) not null, " +
" ts bigint, " +
" temperature double, " +
" pt AS PROCTIME() " +
") with (" +
" 'connector.type' = 'filesystem', " +
" 'connector.path' = '/sensor.txt', " +
" 'format.type' = 'csv')";
tableEnv.sqlUpdate(sinkDDL);

4.从DataStream流建表+evnettime语义

事件时间语义 和watermark在生成table之前就定义了,建表时使用.rowtime

  • 举例
public static void main(String[] args) throws Exception {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(1);
        //指定时间语义
        env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
        DataStreamSource<String> dataStreamSource = env.readTextFile("D:\\workspace21\\myflink\\src\\main\\resources\\sensors.txt");
        DataStream<SensorReading> mapDataStream = dataStreamSource.map(el -> {
            String[] split = el.split(",");
            return new SensorReading(split[0], Double.valueOf(split[2]), Long.valueOf(split[1]));
        }).assignTimestampsAndWatermarks(new BoundedOutOfOrdernessTimestampExtractor<SensorReading>(Time.seconds(3)) {
            @Override
            public long extractTimestamp(SensorReading element) {
                return element.getTimestamp() * 1000L;
            }
        });
        StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env);
        //event time  使用 rt.rowtime 声明 这个时候rt已经不是原来的timestamp的LONG类型的时间戳了 而是TIMESTAMP(3)
       `Table tableApi = tableEnv.fromDataStream(mapDataStream, "id,temperature as tp,rt.rowtime,timestamp as ts");`
        tableApi.printSchema();
        tableEnv.toAppendStream(tableApi, Row.class).print("api");
        env.execute();
    }
  • scheam 如下:
root
 |-- id: STRING
 |-- tp: DOUBLE
 |-- rt: TIMESTAMP(3) *ROWTIME*
 |-- timestamp: BIGINT
  • 数据如下:
api> sensor_1,37.9,2021-01-31 11:35:07.0,1612092907
api> sensor_2,50.1,2021-01-31 11:34:15.0,1612092855
api> sensor_3,23.7,2021-01-31 11:34:58.0,1612092898
api> sensor_4,15.3,2021-01-31 11:35:17.0,1612092917

5.使用connect+format+schema建表+eventtime语义

  • 举例
  public static void main(String[] args) throws Exception {
        //对于流式环境 StreamExecutionEnvironment 是必不可少的
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(1);
        env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
        //快速创建时使用默认的planner 版本不同默认不同
        StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env);
        //2. 连接数据源并注册成一张表
        String filePath = "D:\\workspace21\\myflink\\src\\main\\resources\\sensors.txt";
        tableEnv.connect(new FileSystem().path(filePath))
                //withFormat 是用来告诉flink我应该怎么处理来源用的每一条数据 比如按csv的格式,号分割
                .withFormat(new Csv())
                //withSchema 是声明创建表的表结构 要按照解析得到的数据的先后顺序对应
                .withSchema(new Schema()
                        .field("id", DataTypes.STRING())
                        .field("ts", DataTypes.BIGINT())
                        .field("temp", DataTypes.DOUBLE())
                        //还是增加一列是event time的列 但是需要声明watermark的提取和生成方式
                        .rowtime(new Rowtime()
                                .timestampsFromField("ts") // 从字段中提取时间戳
                                .watermarksPeriodicBounded(1000) // watermark延迟1秒
                        )
                )
                .createTemporaryTable("inputTable");
        Table inputTable = tableEnv.from("inputTable");
        inputTable.printSchema();
        tableEnv.toAppendStream(inputTable, Row.class);
        env.execute();
    }

6.使用DDL方式建表+event time语义

  • 举例
String sinkDDL=
"create table dataTable (" +
" id varchar(20) not null, " +
" ts bigint, " +
" temperature double, " +
" rt AS TO_TIMESTAMP( FROM_UNIXTIME(ts) ), " +
" watermark for rt as rt - interval '1' second" +
") with (" +
" 'connector.type' = 'filesystem', " +
" 'connector.path' = '/sensor.txt', " +
" 'format.type' = 'csv')";
tableEnv.sqlUpdate(sinkDDL);
posted @ 2021-04-11 18:56  从不吃鱼的猫  阅读(1194)  评论(0编辑  收藏  举报