Flink实时写入clickhouse

package com.otis.clickhouse

import java.util

import org.apache.flink.api.common.state.ValueStateDescriptor
import org.apache.flink.streaming.api.scala._
import org.apache.flink.streaming.api.scala.function.ProcessAllWindowFunction
import org.apache.flink.streaming.api.windowing.time.Time
import org.apache.flink.streaming.api.windowing.triggers.{Trigger, TriggerResult}
import org.apache.flink.streaming.api.windowing.windows.TimeWindow
import org.apache.flink.table.api.EnvironmentSettings
import org.apache.flink.table.api.bridge.scala._
import org.apache.flink.types.Row
import org.apache.flink.util.Collector

object FlinkJob3 {
  def main(args: Array[String]): Unit = {
    val env = StreamExecutionEnvironment.getExecutionEnvironment
    env.setParallelism(1)
    val settings = EnvironmentSettings.newInstance.useBlinkPlanner.inStreamingMode.build
    val tableEnv = StreamTableEnvironment.create(env, settings)

    val datagen =
      """
        |create table datagen(
        |name string,
        |address string,
        |age int
        |)with(
        |'connector'='datagen',
        |'rows-per-second'='5'
        |)
        |""".stripMargin

    tableEnv.executeSql(datagen)
    val table = tableEnv.sqlQuery("select name,address,abs(age) from datagen")
    val stream = tableEnv.toAppendStream[Row](table)

    // flink有timeWindow和countWindow 都不满足需求
    // 我既想按照一定时间聚合,又想如果条数达到batchSize就触发计算,只能定义触发器
    // todo 目前问题是,在这个时间窗口内达到了maxcount计算了一次,触发计算后到窗口关闭的这段时间,有一批数据,0<数据量< maxcount,这一批数据会在时间窗口到达时触发计算
    val stream2: DataStream[util.List[Row]] = stream.timeWindowAll(Time.seconds(5))
      .trigger(new MyCountTrigger(20))
      .process(new MyPWFunction)


    val sql = "INSERT INTO user2 (name, address, age) VALUES (?,?,?)"
    val tableColums = Array("name", "address", "age")
    val types = Array("string", "string", "int")
    stream2.print()
    stream2.addSink(new MyClickHouseSink3(sql, tableColums, types))
    env.execute("clickhouse sink test")
  }

  //触发器触发或者到时间后,把所有的结果收集到了这里,在这里计算
  class MyPWFunction extends ProcessAllWindowFunction[Row, util.List[Row], TimeWindow] {
    override def process(context: Context, elements: Iterable[Row], out: Collector[util.List[Row]]): Unit = {
      val list = new util.ArrayList[Row]
      elements.foreach(x => list.add(x))
      out.collect(list)
    }
  }

  class MyCountTrigger(maxCount: Int) extends Trigger[Row, TimeWindow] {
    private lazy val count: ValueStateDescriptor[Int] = new ValueStateDescriptor[Int]("counter", classOf[Int])

    override def onElement(element: Row, timestamp: Long, window: TimeWindow, ctx: Trigger.TriggerContext): TriggerResult = {
      val cnt_state = ctx.getPartitionedState(count)
      val cnt = cnt_state.value()
      cnt_state.update(cnt + 1)
      if ((cnt + 1) >= maxCount) {
        cnt_state.clear()
        TriggerResult.FIRE_AND_PURGE
      } else {
        TriggerResult.CONTINUE
      }
    }

    override def onProcessingTime(time: Long, window: TimeWindow, ctx: Trigger.TriggerContext): TriggerResult = TriggerResult.FIRE_AND_PURGE

    override def onEventTime(time: Long, window: TimeWindow, ctx: Trigger.TriggerContext): TriggerResult = TriggerResult.CONTINUE

    override def clear(window: TimeWindow, ctx: Trigger.TriggerContext): Unit = {
      ctx.getPartitionedState(count).clear()
      TriggerResult.PURGE
    }

  }

}

package com.otis.clickhouse;

import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.functions.sink.RichSinkFunction;
import org.apache.flink.types.Row;

import java.sql.Connection;
import java.sql.PreparedStatement;
import java.util.List;

public class MyClickHouseSink3 extends RichSinkFunction<List<Row>> {
    Connection connection = null;
    String sql;

    //Row 的字段和类型
    private String[] tableColums;
    private  String[] types;

    public MyClickHouseSink3(String sql, String[] tableColums,  String[] types) {
        this.sql = sql;
        this.tableColums = tableColums;
        this.types = types;
    }

    @Override
    public void open(Configuration parameters) throws Exception {
        super.open(parameters);
        connection = ClickHouseUtil.getConnection("10.1.30.10", 8123, "qinghua");
    }

    @Override
    public void close() throws Exception {
        super.close();
        connection.close();
    }

    @Override
    public void invoke(List<Row> value, Context context) throws Exception {
        PreparedStatement preparedStatement = connection.prepareStatement(sql);
        for (Row e : value) {
            int length = tableColums.length;
            for (int i = 0; i < length; i++) {
                String type = types[i];
                if (e.getField(i) != null) {
                    switch (type) {
                        case "string":
                            preparedStatement.setString(i + 1, (String) e.getField(i));
                            break;
                        case "int":
                            preparedStatement.setInt(i + 1, (int) e.getField(i));
                            break;
                        default:
                            break;
                    }
                } else {
                    preparedStatement.setObject(i + 1, null);
                }
            }
            preparedStatement.addBatch();
        }
        long startTime = System.currentTimeMillis();
        int[] ints = preparedStatement.executeBatch();
        connection.commit();
        long endTime = System.currentTimeMillis();
        System.out.println("批量插入完毕用时:" + (endTime - startTime) + " -- 插入数据 = " + ints.length);
    }

}
posted @ 2021-01-19 15:34  吸血鬼尼克  阅读(6060)  评论(0编辑  收藏  举报