练习: Flink Sink 将数据 保存 到 HDFS MySQL
> bean.sensor
1 import lombok.AllArgsConstructor; 2 import lombok.Data; 3 import lombok.NoArgsConstructor; 4 5 @Data 6 @NoArgsConstructor 7 @AllArgsConstructor 8 public class Sensor { 9 private String sensor_id; 10 private Long sensor_timeStamp; 11 private Double sensor_Temp; 12 }
> bean.shop
1 import lombok.AllArgsConstructor; 2 import lombok.Data; 3 import lombok.NoArgsConstructor; 4 5 @Data 6 @AllArgsConstructor 7 @NoArgsConstructor 8 public class Shop { 9 private String uid; 10 private String type; 11 private String name; 12 private Integer num; 13 private Double price; 14 private Long time; 15 private Double total; 16 }
> util.propss
1 package utils; 2 3 import org.apache.hadoop.conf.Configuration; 4 import java.util.HashMap; 5 import java.util.Properties; 6 7 public class Propss { 8 public static Properties producer_Props = new Properties(); 9 public static Properties consumer_Props = new Properties(); 10 public static HashMap<String, Object> kafka_Consumer = new HashMap<>(); 11 public static HashMap<String, Object> kafka_Producer = new HashMap<>(); 12 13 public static Configuration getZkConf(){ 14 Configuration conf = new Configuration(); 15 conf.set("hbase.zookeeper.quorum","hadoop106,hadoop107,hadoop108"); 16 conf.set("hbae.zookeeper.property.client","2181"); 17 return conf; 18 } 19 20 static{ 21 22 kafka_Producer.put("bootstrap.servers", "hadoop106:9092,hadoop107:9092,hadoop108:9092"); 23 //0、1 和 all:0表示只要把消息发送出去就返回成功;1表示只要Leader收到消息就返回成功;all表示所有副本都写入数据成功才算成功 24 kafka_Producer.put("acks", "all"); 25 //重试次数 26 kafka_Producer.put("retries", Integer.MAX_VALUE); 27 //批处理的字节数 28 kafka_Producer.put("batch.size", 16384); 29 //批处理的延迟时间,当批次数据未满之时等待的时间 30 kafka_Producer.put("linger.ms", 1); 31 //用来约束KafkaProducer能够使用的内存缓冲的大小的,默认值32MB 32 kafka_Producer.put("buffer.memory", 33554432); 33 // properties.put("value.serializer", 34 // "org.apache.kafka.common.serialization.ByteArraySerializer"); 35 // properties.put("key.serializer", 36 // "org.apache.kafka.common.serialization.ByteArraySerializer"); 37 kafka_Producer.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer"); 38 kafka_Producer.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer"); 39 40 kafka_Consumer.put("bootstrap.servers","hadoop106:9092,hadoop107:9092,hadoop108:9092"); 41 kafka_Consumer.put("group.id", "com-test"); 42 //from beginning 43 kafka_Consumer.put("auto.offset.reset","earliest"); 44 kafka_Consumer.put("key.deserializer","org.apache.kafka.common.serialization.StringDeserializer"); 45 kafka_Consumer.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer"); 46 47 producer_Props.setProperty("bootstrap.servers","hadoop106:9092,hadoop107:9092,hadoop108:9092"); 48 producer_Props.setProperty("ack","all"); 49 producer_Props.put("key.serializer","org.apache.kafka.common.serialization.StringSerializer"); 50 producer_Props.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer"); 51 producer_Props.put("auto.offset.reset","earliest"); 52 53 consumer_Props.setProperty("bootstrap.servers","hadoop106:9092,hadoop107:9092,hadoop108:9092"); 54 consumer_Props.setProperty("group.id", "com-test"); 55 consumer_Props.put("key.deserializer","org.apache.kafka.common.serialization.StringDeserializer"); 56 consumer_Props.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer"); 57 consumer_Props.put("auto.offset.reset","earliest"); 58 } 59 }
>util.MySqlUtil
1 import org.apache.flink.configuration.Configuration; 2 import org.apache.flink.streaming.api.functions.sink.RichSinkFunction; 3 import java.sql.Connection; 4 import java.sql.DriverManager; 5 import java.sql.PreparedStatement; 6 7 8 //自定义 mysql sink 工具类 继承 RichSinkFuncation 9 public class MySqlUtil extends RichSinkFunction<Shop> { 10 private Connection conn; 11 private PreparedStatement pre; 12 @Override 13 public void open(Configuration parameters) throws Exception { 14 Class.forName("com.mysql.jdbc.Driver"); 15 conn = DriverManager.getConnection("jdbc:mysql://hadoop106:3306/test3", "root", "root"); 16 conn.setAutoCommit(true); 17 } 18 19 @Override 20 public void invoke(Shop value, Context context) throws Exception { 21 pre = conn.prepareStatement("insert into shop(type,total) values (?,?)"); 22 pre.setString(1,value.getType()); 23 pre.setDouble(2,value.getTotal()); 24 pre.execute(); 25 } 26 27 @Override 28 public void close() throws Exception { 29 pre.close(); 30 conn.close(); 31 } 32 }
> test
存 hdfs
1 import org.apache.flink.api.common.serialization.SimpleStringEncoder; 2 import org.apache.flink.core.fs.Path; 3 import org.apache.flink.streaming.api.datastream.DataStreamSource; 4 import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 5 import org.apache.flink.streaming.api.functions.sink.filesystem.StreamingFileSink; 6 import org.apache.flink.streaming.api.functions.sink.filesystem.rollingpolicies.DefaultRollingPolicy; 7 8 import java.util.concurrent.TimeUnit; 9 //sink 到 hdfs 10 public class Flink_Sink_HDFS { 11 public static void main(String[] args) throws Exception { 12 //Flink环境 13 StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 14 env.setParallelism(1); 15 //套接字 数据源 hadoop106 16 DataStreamSource<String> hadoop106 = env.socketTextStream("hadoop106", 9999); 17 //sink hdfs 存储路径 18 String path = "hdfs://hadoop106:8020//test//out"; 19 Path outputPath = new Path(path); 20 21 //存储设置,检查点10秒 22 env.enableCheckpointing(10000); 23 24 //sink 25 final StreamingFileSink<String> sink = StreamingFileSink 26 .forRowFormat(outputPath, new SimpleStringEncoder<String>("UTF-8")) 27 .withRollingPolicy( 28 DefaultRollingPolicy.builder() 29 .withRolloverInterval(TimeUnit.MINUTES.toMillis(15)) 30 .withInactivityInterval(TimeUnit.MINUTES.toMillis(5)) 31 .withMaxPartSize(1024 * 1024 * 1024) 32 .build()) 33 .build(); 34 //存入hdfs 35 hadoop106.addSink(sink); 36 env.execute(); 37 38 } 39 }
> test
存入 Mysql
1 import org.apache.flink.api.common.functions.MapFunction; 2 import org.apache.flink.api.common.serialization.SimpleStringSchema; 3 import org.apache.flink.streaming.api.TimeCharacteristic; 4 import org.apache.flink.streaming.api.datastream.DataStreamSource; 5 import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; 6 import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 7 import org.apache.flink.streaming.api.functions.timestamps.BoundedOutOfOrdernessTimestampExtractor; 8 import org.apache.flink.streaming.api.windowing.time.Time; 9 import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer; 10 11 import java.util.Arrays; 12 import java.util.List; 13 14 public class Flink_Sink_Mysql { 15 public static void main(String[] args) throws Exception { 16 //Flink环境 17 StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 18 env.setParallelism(1); 19 //时间语义:事件时间 20 env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); 21 //kafka topic : day1 22 List<String> topic = Arrays.asList("day1"); 23 //拿到消费者 24 FlinkKafkaConsumer<String> sss = new FlinkKafkaConsumer<>(topic, new SimpleStringSchema(), PropsUtil.consumer_Props); 25 //数据源sss从头消费 26 sss.setStartFromEarliest(); 27 //数据源获取数据 28 DataStreamSource<String> source = env.addSource(sss); 29 30 //分装成商品shop对象 31 SingleOutputStreamOperator<Shop> flat = source.map(new MapFunction<String, Shop>() { 32 @Override 33 public Shop map(String s) throws Exception { 34 String[] ss = s.split(","); 35 return new Shop(ss[0], ss[1], ss[2], Integer.valueOf(ss[3]), Double.valueOf(ss[4]), Long.valueOf(ss[5]), Integer.valueOf(ss[3]) * Double.valueOf(ss[4])); 36 } 37 }); 38 //水位线3s 39 SingleOutputStreamOperator<Shop> ope = flat.assignTimestampsAndWatermarks(new BoundedOutOfOrdernessTimestampExtractor<Shop>(Time.seconds(3)) { 40 @Override 41 public long extractTimestamp(Shop shop) { 42 return shop.getTime(); 43 } 44 }); 45 46 SingleOutputStreamOperator<Shop> sum = ope.keyBy("type").timeWindow(Time.seconds(60)).sum("total"); 47 sum.print(); 48 //存入Mysql 49 sum.addSink(new MySqlUtil()); 50 51 env.execute(); 52 } 53 }