package com.otis.clickhouse
import java.util
import org.apache.flink.api.common.state.ValueStateDescriptor
import org.apache.flink.streaming.api.scala._
import org.apache.flink.streaming.api.scala.function.ProcessAllWindowFunction
import org.apache.flink.streaming.api.windowing.time.Time
import org.apache.flink.streaming.api.windowing.triggers.{Trigger, TriggerResult}
import org.apache.flink.streaming.api.windowing.windows.TimeWindow
import org.apache.flink.table.api.EnvironmentSettings
import org.apache.flink.table.api.bridge.scala._
import org.apache.flink.types.Row
import org.apache.flink.util.Collector
object FlinkJob3 {
def main(args: Array[String]): Unit = {
val env = StreamExecutionEnvironment.getExecutionEnvironment
env.setParallelism(1)
val settings = EnvironmentSettings.newInstance.useBlinkPlanner.inStreamingMode.build
val tableEnv = StreamTableEnvironment.create(env, settings)
val datagen =
"""
|create table datagen(
|name string,
|address string,
|age int
|)with(
|'connector'='datagen',
|'rows-per-second'='5'
|)
|""".stripMargin
tableEnv.executeSql(datagen)
val table = tableEnv.sqlQuery("select name,address,abs(age) from datagen")
val stream = tableEnv.toAppendStream[Row](table)
// flink有timeWindow和countWindow 都不满足需求
// 我既想按照一定时间聚合,又想如果条数达到batchSize就触发计算,只能定义触发器
// todo 目前问题是,在这个时间窗口内达到了maxcount计算了一次,触发计算后到窗口关闭的这段时间,有一批数据,0<数据量< maxcount,这一批数据会在时间窗口到达时触发计算
val stream2: DataStream[util.List[Row]] = stream.timeWindowAll(Time.seconds(5))
.trigger(new MyCountTrigger(20))
.process(new MyPWFunction)
val sql = "INSERT INTO user2 (name, address, age) VALUES (?,?,?)"
val tableColums = Array("name", "address", "age")
val types = Array("string", "string", "int")
stream2.print()
stream2.addSink(new MyClickHouseSink3(sql, tableColums, types))
env.execute("clickhouse sink test")
}
//触发器触发或者到时间后,把所有的结果收集到了这里,在这里计算
class MyPWFunction extends ProcessAllWindowFunction[Row, util.List[Row], TimeWindow] {
override def process(context: Context, elements: Iterable[Row], out: Collector[util.List[Row]]): Unit = {
val list = new util.ArrayList[Row]
elements.foreach(x => list.add(x))
out.collect(list)
}
}
class MyCountTrigger(maxCount: Int) extends Trigger[Row, TimeWindow] {
private lazy val count: ValueStateDescriptor[Int] = new ValueStateDescriptor[Int]("counter", classOf[Int])
override def onElement(element: Row, timestamp: Long, window: TimeWindow, ctx: Trigger.TriggerContext): TriggerResult = {
val cnt_state = ctx.getPartitionedState(count)
val cnt = cnt_state.value()
cnt_state.update(cnt + 1)
if ((cnt + 1) >= maxCount) {
cnt_state.clear()
TriggerResult.FIRE_AND_PURGE
} else {
TriggerResult.CONTINUE
}
}
override def onProcessingTime(time: Long, window: TimeWindow, ctx: Trigger.TriggerContext): TriggerResult = TriggerResult.FIRE_AND_PURGE
override def onEventTime(time: Long, window: TimeWindow, ctx: Trigger.TriggerContext): TriggerResult = TriggerResult.CONTINUE
override def clear(window: TimeWindow, ctx: Trigger.TriggerContext): Unit = {
ctx.getPartitionedState(count).clear()
TriggerResult.PURGE
}
}
}
package com.otis.clickhouse;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.functions.sink.RichSinkFunction;
import org.apache.flink.types.Row;
import java.sql.Connection;
import java.sql.PreparedStatement;
import java.util.List;
public class MyClickHouseSink3 extends RichSinkFunction<List<Row>> {
Connection connection = null;
String sql;
//Row 的字段和类型
private String[] tableColums;
private String[] types;
public MyClickHouseSink3(String sql, String[] tableColums, String[] types) {
this.sql = sql;
this.tableColums = tableColums;
this.types = types;
}
@Override
public void open(Configuration parameters) throws Exception {
super.open(parameters);
connection = ClickHouseUtil.getConnection("10.1.30.10", 8123, "qinghua");
}
@Override
public void close() throws Exception {
super.close();
connection.close();
}
@Override
public void invoke(List<Row> value, Context context) throws Exception {
PreparedStatement preparedStatement = connection.prepareStatement(sql);
for (Row e : value) {
int length = tableColums.length;
for (int i = 0; i < length; i++) {
String type = types[i];
if (e.getField(i) != null) {
switch (type) {
case "string":
preparedStatement.setString(i + 1, (String) e.getField(i));
break;
case "int":
preparedStatement.setInt(i + 1, (int) e.getField(i));
break;
default:
break;
}
} else {
preparedStatement.setObject(i + 1, null);
}
}
preparedStatement.addBatch();
}
long startTime = System.currentTimeMillis();
int[] ints = preparedStatement.executeBatch();
connection.commit();
long endTime = System.currentTimeMillis();
System.out.println("批量插入完毕用时:" + (endTime - startTime) + " -- 插入数据 = " + ints.length);
}
}