练习 : 用 utils 读 存 kafak,flink sql 查询
bean
1 package bean; 2 3 import lombok.AllArgsConstructor; 4 import lombok.Data; 5 import lombok.NoArgsConstructor; 6 7 /** 8 * Package: bean 9 * Description: TODO 10 * Author ly 11 * Date 2022/3/12 15:16 12 * Created by dell on 2022/3/12 13 */ 14 @Data 15 @AllArgsConstructor 16 @NoArgsConstructor 17 public class Base_province { 18 private Long id; 19 private String name; 20 private String region_id; 21 private String area_code; 22 private String iso_code; 23 }
1 package bean; 2 3 import lombok.AllArgsConstructor; 4 import lombok.Data; 5 import lombok.NoArgsConstructor; 6 7 @Data 8 @AllArgsConstructor 9 @NoArgsConstructor 10 public class Gmv { 11 private Integer user_id; 12 private String username; 13 private String consignee; 14 private Double final_total_amount; 15 private Integer province_id; 16 private String name; 17 private Integer sku_id; 18 private String sku_name; 19 private Integer sku_num; 20 }
1 package bean; 2 3 import lombok.AllArgsConstructor; 4 import lombok.Data; 5 import lombok.NoArgsConstructor; 6 7 /** 8 * Package: bean 9 * Description: TODO 10 * Author ly 11 * Date 2022/3/13 16:01 12 * Created by dell on 2022/3/13 13 */ 14 @Data 15 @AllArgsConstructor 16 @NoArgsConstructor 17 public class Order_detail { 18 private Long id; 19 private Long order_id; 20 private Long sku_id; 21 private String sku_name; 22 private String img_url; 23 private Double order_price; 24 private String sku_num; 25 }
1 package bean; 2 3 import lombok.AllArgsConstructor; 4 import lombok.Data; 5 import lombok.NoArgsConstructor; 6 7 /** 8 * Package: bean 9 * Description: TODO 10 * Author ly 11 * Date 2022/3/12 15:33 12 * Created by dell on 2022/3/12 13 */ 14 @Data 15 @AllArgsConstructor 16 @NoArgsConstructor 17 public class Order_info { 18 private Long id; 19 private String consignee; 20 private String consignee_tel; 21 private Double final_total_amount; 22 private String order_status; 23 private Long user_id; 24 private String delivery_address; 25 private String order_comment; 26 private String out_trade_no; 27 private String trade_body; 28 private String create_time; 29 private String operate_time; 30 private String expire_time; 31 private String tracking_no; 32 private Long parent_order_id; 33 private String img_url; 34 private Integer province_id; 35 private Double benefit_reduce_amount; 36 private Double original_total_amount; 37 private Double feight_fee; 38 }
1 package bean; 2 3 import lombok.AllArgsConstructor; 4 import lombok.Data; 5 import lombok.NoArgsConstructor; 6 7 /** 8 * Package: bean 9 * Description: TODO 10 * Author ly 11 * Date 2022/3/12 15:23 12 * Created by dell on 2022/3/12 13 */ 14 @Data 15 @AllArgsConstructor 16 @NoArgsConstructor 17 public class Sku_info { 18 private Long id; 19 private Long spu_id; 20 private Double price; 21 private String sku_name; 22 private String sku_desc; 23 private Double weight; 24 private Long tm_id; 25 private Long category3_id; 26 private String sku_default_img; 27 private String create_time; 28 }
1 package bean; 2 3 import lombok.AllArgsConstructor; 4 import lombok.Data; 5 import lombok.NoArgsConstructor; 6 7 /** 8 * Package: bean 9 * Description: TODO 10 * Author ly 11 * Date 2022/3/11 11:51 12 * Created by dell on 2022/3/11 13 */ 14 @Data 15 @AllArgsConstructor 16 @NoArgsConstructor 17 public class Student { 18 private Integer sid; 19 private String sname; 20 private String sex; 21 }
1 package bean; 2 3 import lombok.AllArgsConstructor; 4 import lombok.Data; 5 import lombok.NoArgsConstructor; 6 7 8 /** 9 * Package: bean 10 * Description: TODO 11 * Author ly 12 * Date 2022/3/11 17:50 13 * Created by dell on 2022/3/11 14 */ 15 @Data 16 @AllArgsConstructor 17 @NoArgsConstructor 18 public class User_info { 19 private Long id; 20 private String login_name; 21 private String nick_name; 22 private String passwd; 23 private String name; 24 private String phone_num; 25 private String email; 26 private String head_img; 27 private String user_level; 28 private String birthday; 29 private String gender; 30 private String create_time; 31 }
utils
1 package util; 2 3 import org.apache.hadoop.conf.Configuration; 4 import java.util.HashMap; 5 import java.util.Properties; 6 7 public class Propss { 8 public static Properties producer_Props = new Properties(); 9 public static Properties consumer_Props = new Properties(); 10 static HashMap<String, Object> kafka_Consumer = new HashMap<>(); 11 public static Configuration setConf(Configuration conf){ 12 conf.set("hbase.zookeeper.quorum","hadoop106,hadoop107,hadoop108"); 13 conf.set("hbae.zookeeper.property.client","2181"); 14 return conf; 15 } 16 //january february march april may june july august september october november december 17 static{ 18 kafka_Consumer.put("bootstrap.servers","hadoop106:9092,hadoop107:9092,hadoop108:9092"); 19 kafka_Consumer.put("group.id", "com/test"); 20 //from beginning 21 kafka_Consumer.put("auto.offset.reset","earliest"); 22 kafka_Consumer.put("key.deserializer","org.apache.kafka.common.serialization.StringDeserializer"); 23 kafka_Consumer.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer"); 24 25 producer_Props.setProperty("bootstrap.servers","hadoop106:9092,hadoop107:9092,hadoop108:9092"); 26 producer_Props.setProperty("ack","all"); 27 producer_Props.put("key.serializer","org.apache.kafka.common.serialization.StringSerializer"); 28 producer_Props.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer"); 29 producer_Props.put("auto.offset.reset","earliest"); 30 31 consumer_Props.setProperty("bootstrap.servers","hadoop106:9092,hadoop107:9092,hadoop108:9092"); 32 consumer_Props.setProperty("group.id", "com/test"); 33 consumer_Props.put("key.deserializer","org.apache.kafka.common.serialization.StringDeserializer"); 34 consumer_Props.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer"); 35 consumer_Props.put("auto.offset.reset","earliest"); 36 } 37 }
1 package util; 2 3 import org.apache.flink.api.java.io.jdbc.JDBCInputFormat; 4 import org.apache.flink.api.java.typeutils.RowTypeInfo; 5 import org.apache.kafka.clients.producer.KafkaProducer; 6 import org.apache.kafka.clients.producer.ProducerRecord; 7 8 import java.lang.reflect.Field; 9 import java.sql.*; 10 11 public class MySQL_Utils { 12 //方法一:原生 mysql jdbc (已经废弃) 13 private static Connection conn; 14 private static PreparedStatement pre; 15 static { 16 try { 17 Class.forName("com.mysql.jdbc.Driver"); 18 conn = DriverManager.getConnection("jdbc:mysql://hadoop106:3306/test3", "root", "root"); 19 conn.setAutoCommit(true); 20 } catch (Exception e) { 21 e.printStackTrace(); 22 } 23 } 24 //方法二:flink.api.java.io.jdbc 获取format 25 public static JDBCInputFormat getFormat(String database, String sql, RowTypeInfo rowTypeInfo){ 26 String driver="com.mysql.jdbc.Driver"; 27 String url="jdbc:mysql://hadoop106:3306/"+database+"?useSSL=false&useUnicode=true&characterEncoding=UTF-8"; 28 String user="root"; 29 String passwd="root"; 30 JDBCInputFormat format = JDBCInputFormat.buildJDBCInputFormat() 31 .setDrivername(driver) 32 .setDBUrl(url) 33 .setUsername(user) 34 .setPassword(passwd) 35 .setQuery(sql) 36 .setRowTypeInfo(rowTypeInfo) 37 .finish(); 38 return format; 39 } 40 41 //执行sql建立宽表 42 public static void execute(String database, String sql){ 43 String driver="com.mysql.jdbc.Driver"; 44 String url="jdbc:mysql://hadoop106:3306/"+database+"?useSSL=false&useUnicode=true&characterEncoding=UTF-8"; 45 String user="root"; 46 String passwd="root"; 47 try { 48 Class.forName(driver); 49 conn = DriverManager.getConnection(url, user, passwd); 50 conn.setAutoCommit(true); 51 pre = conn.prepareStatement(sql); 52 pre.execute(); 53 } catch (Exception throwables) { 54 throwables.printStackTrace(); 55 } 56 } 57 //表发送到kafka 58 public static void sendKafka(String sql,String topic,Class T) throws Exception { 59 60 Field[] fields =T.getClass().getDeclaredFields(); 61 String[] fieldName = new String[fields.length]; 62 for (int i = 0; i < fieldName.length; i++) { 63 fieldName[i] = fields[i].getName(); 64 } 65 66 PreparedStatement ps = conn.prepareStatement(sql); 67 ResultSet rs = ps.executeQuery(); 68 while (rs.next()){ 69 //Producer 70 KafkaProducer kafkaProducer = new KafkaProducer(Propss.producer_Props); 71 String line=""; 72 for (int i = 0; i < fieldName.length; i++) { 73 line+=","; 74 line+=rs.getString(fieldName[i]); 75 } 76 line=line.substring(1); 77 System.out.println(line); 78 kafkaProducer.send(new ProducerRecord(topic,line)); 79 kafkaProducer.close(); 80 } 81 } 82 }
1 package util; 2 3 import org.apache.hadoop.conf.Configuration; 4 import org.apache.hadoop.hbase.HBaseConfiguration; 5 import org.apache.hadoop.hbase.client.Connection; 6 import org.apache.hadoop.hbase.client.ConnectionFactory; 7 8 import java.io.IOException; 9 10 public class HBase_Util { 11 public static Connection getConf(){ 12 //创建HBase的配置对象 13 Configuration conf = HBaseConfiguration.create(); 14 //设置hbase配置属性 15 conf.set("hbase.zookeeper.quorum","hadoop106,hadoop107,hadoop108"); 16 conf.set("hbase.zookeeper.property.clientPort","2181"); 17 Connection connection=null; 18 //通过连接函数,创建连接对象 19 try { 20 connection = ConnectionFactory.createConnection(conf); 21 22 } catch (IOException e) { 23 e.printStackTrace(); 24 } 25 return connection; 26 } 27 }
1 package util; 2 3 import org.apache.kafka.clients.producer.KafkaProducer; 4 import org.apache.kafka.clients.producer.ProducerRecord; 5 6 import java.lang.reflect.Field; 7 import java.sql.Connection; 8 import java.sql.DriverManager; 9 import java.sql.PreparedStatement; 10 import java.sql.ResultSet; 11 12 public class Send_Kafka_Urils { 13 private static Connection conn; 14 static { 15 try { 16 Class.forName("com.mysql.jdbc.Driver"); 17 conn = DriverManager.getConnection("jdbc:mysql://hadoop106:3306/test3", "root", "root"); 18 conn.setAutoCommit(true); 19 } catch (Exception e) { 20 e.printStackTrace(); 21 } 22 } 23 public static void selectAndSendKafka(String sql,String topic,Class T) throws Exception { 24 25 Field[] fields = T.getDeclaredFields(); 26 String[] fieldName = new String[fields.length]; 27 for (int i = 0; i < fieldName.length; i++) { 28 fieldName[i] = fields[i].getName(); 29 } 30 31 PreparedStatement ps = conn.prepareStatement(sql); 32 ResultSet rs = ps.executeQuery(); 33 while (rs.next()) { 34 //Producer 35 KafkaProducer kafkaProducer = new KafkaProducer(Propss.producer_Props); 36 String line = ""; 37 for (int i = 0; i < fieldName.length; i++) { 38 line += ","; 39 line += rs.getString(fieldName[i]); 40 } 41 line = line.substring(1); 42 System.out.println(line); 43 kafkaProducer.send(new ProducerRecord(topic, line)); 44 kafkaProducer.close(); 45 } 46 } 47 48 public static void sendKafka(String line,String topic) throws Exception { 49 50 while (line!=null){ 51 //Producer 52 KafkaProducer kafkaProducer = new KafkaProducer(Propss.producer_Props); 53 kafkaProducer.send(new ProducerRecord(topic,line)); 54 kafkaProducer.close(); 55 } 56 } 57 }
sink
1 package sink; 2 3 import util.HBase_Util; 4 import org.apache.flink.configuration.Configuration; 5 import org.apache.flink.streaming.api.functions.sink.RichSinkFunction; 6 import org.apache.hadoop.hbase.TableName; 7 import org.apache.hadoop.hbase.client.Connection; 8 import org.apache.hadoop.hbase.client.Put; 9 import org.apache.hadoop.hbase.client.Table; 10 import org.apache.hadoop.hbase.util.Bytes; 11 12 import java.lang.reflect.Field; 13 import java.util.ArrayList; 14 import java.util.List; 15 import java.util.UUID; 16 17 public class HBase_Sink extends RichSinkFunction<String> { 18 private Connection connection; 19 private Class T; 20 private String tableName; 21 private String[] fieldsName; 22 List<Put> list=new ArrayList<Put>(); 23 24 public static String[] getFiledName(Class T) { 25 Field[] fields =T.getDeclaredFields(); 26 String[] fieldName = new String[fields.length]; 27 for (int i = 0; i < fieldName.length; i++) { 28 fieldName[i] = fields[i].getName(); 29 } 30 return fieldName; 31 } 32 33 public HBase_Sink(Class T,String tableName){ 34 this.T=T; 35 this.tableName=tableName; 36 this.fieldsName=getFiledName(T); 37 } 38 39 @Override 40 public void open(Configuration parameters) throws Exception { 41 connection= HBase_Util.getConf(); 42 } 43 44 @Override 45 public void invoke(String value, Context context) throws Exception { 46 String[] s1 = value.split(","); 47 Table table = connection.getTable(TableName.valueOf(tableName)); 48 // String rowkey = UUID.randomUUID().toString().replaceAll("-", ""); 49 Put put = new Put(Bytes.toBytes(s1[0])); 50 for (int i = 0; i < fieldsName.length; i++) { 51 put.addColumn(Bytes.toBytes("info"),Bytes.toBytes(fieldsName[i]),Bytes.toBytes(s1[i])); 52 list.add(put); 53 } 54 table.put(list); 55 } 56 57 @Override 58 public void close() throws Exception { 59 connection.close(); 60 } 61 }
test
1 package test; 2 3 import bean.*; 4 import org.apache.flink.api.common.functions.MapFunction; 5 import org.apache.flink.api.common.serialization.SimpleStringSchema; 6 import org.apache.flink.api.common.typeinfo.BasicTypeInfo; 7 import org.apache.flink.api.java.io.jdbc.JDBCInputFormat; 8 import org.apache.flink.api.java.typeutils.RowTypeInfo; 9 import org.apache.flink.streaming.api.CheckpointingMode; 10 import org.apache.flink.streaming.api.datastream.DataStreamSource; 11 import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; 12 import org.apache.flink.streaming.api.environment.LocalStreamEnvironment; 13 import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 14 import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer; 15 import org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer; 16 import org.apache.flink.table.api.EnvironmentSettings; 17 import org.apache.flink.table.api.Table; 18 import org.apache.flink.table.api.TableResult; 19 import org.apache.flink.table.api.bridge.java.StreamTableEnvironment; 20 import org.apache.flink.types.Row; 21 import sink.HBase_Sink; 22 import util.MySQL_Utils; 23 import util.Propss; 24 import util.Send_Kafka_Urils; 25 26 27 public class Flink_Test { 28 public static void main(String[] args) throws Exception { 29 //3) 创建flink环境,要求并行度为1,设置checkpoint为精准一次消费 30 LocalStreamEnvironment env = StreamExecutionEnvironment.createLocalEnvironment(); 31 env.setParallelism(1); 32 env.getCheckpointConfig().setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE); 33 34 //4) 读取kafka数据源并格式化 35 DataStreamSource<String> ods_db_temp = env.addSource(new FlinkKafkaConsumer<String>("ods_db_temp", new SimpleStringSchema(), Propss.consumer_Props)); 36 SingleOutputStreamOperator<Student> map = ods_db_temp.map(new MapFunction<String, Student>() { 37 @Override 38 public Student map(String s) throws Exception { 39 String[] split = s.split(","); 40 return new Student(Integer.valueOf(split[0]), split[1], split[2]); 41 } 42 }); 43 map.print(); 44 45 46 47 //5) 创建维度标签 发送到对应的kafka topic 48 RowTypeInfo rowTypeInfo = new RowTypeInfo( 49 BasicTypeInfo.LONG_TYPE_INFO, 50 BasicTypeInfo.STRING_TYPE_INFO, 51 BasicTypeInfo.STRING_TYPE_INFO, 52 BasicTypeInfo.STRING_TYPE_INFO, 53 BasicTypeInfo.STRING_TYPE_INFO, 54 BasicTypeInfo.STRING_TYPE_INFO, 55 BasicTypeInfo.STRING_TYPE_INFO, 56 BasicTypeInfo.STRING_TYPE_INFO, 57 BasicTypeInfo.STRING_TYPE_INFO, 58 BasicTypeInfo.DATE_TYPE_INFO, 59 BasicTypeInfo.STRING_TYPE_INFO, 60 BasicTypeInfo.DATE_TYPE_INFO 61 ); 62 JDBCInputFormat user_format = MySQL_Utils.getFormat("test3", "select * from user_info",rowTypeInfo); 63 DataStreamSource<Row> input = env.createInput(user_format); 64 input.print(); 65 SingleOutputStreamOperator<String> line_ = input.map(new MapFunction<Row, String>() { 66 @Override 67 public String map(Row row) throws Exception { 68 String[] fields = rowTypeInfo.getFieldNames(); 69 String line = ""; 70 for (int i = 0; i < fields.length; i++) { 71 line += ","; 72 line += row.getField(i); 73 } 74 return line.substring(1); 75 } 76 }); 77 line_.addSink(new FlinkKafkaProducer("hadoop106:9092", "user_", new SimpleStringSchema())); 78 79 //6) 动态分流 把用户维度,地区维度,商品维度表 发送到 kafka 对应的 topic 中 80 Send_Kafka_Urils.selectAndSendKafka("select * from user_info","user_info", User_info.class); 81 Send_Kafka_Urils.selectAndSendKafka("select * from base_province","base_province", Base_province.class); 82 Send_Kafka_Urils.selectAndSendKafka("select * from sku_info","sku_info", Sku_info.class); 83 84 //7) 维度标签数据 保存到hbase 85 FlinkKafkaConsumer<String> sss = new FlinkKafkaConsumer<>("user_info", new SimpleStringSchema(), Propss.consumer_Props); 86 sss.setStartFromEarliest(); 87 DataStreamSource<String> user = env.addSource(sss); 88 user.print(); 89 user.addSink(new HBase_Sink(User_info.class,"user_info")); 90 91 FlinkKafkaConsumer<String> sst = new FlinkKafkaConsumer<>("base_province", new SimpleStringSchema(), Propss.consumer_Props); 92 sst.setStartFromEarliest(); 93 DataStreamSource<String> province = env.addSource(sst); 94 province.print(); 95 province.addSink(new HBase_Sink(Base_province.class,"base_province")); 96 97 FlinkKafkaConsumer<String> ssw = new FlinkKafkaConsumer<>("sku_info", new SimpleStringSchema(), Propss.consumer_Props); 98 ssw.setStartFromEarliest(); 99 DataStreamSource<String> sku = env.addSource(ssw); 100 sku.print(); 101 sku.addSink(new HBase_Sink(Sku_info.class,"sku_info")); 102 103 //8) 动态分流把订单事实表写入到kafka的ods_db主题中 104 Send_Kafka_Urils.selectAndSendKafka("select * from order_info","ods_db", Order_info.class); 105 106 //9) 读取kafka中的ods_db数据 107 DataStreamSource<String> ods_db = env.addSource(new FlinkKafkaConsumer<String>("ods_db", new SimpleStringSchema(), Propss.consumer_Props)); 108 ods_db.print(); 109 //10) 订单表拉宽关联用户,地区,商品维度,并把结果保存到kafka的dwd_db主题中 110 String sql="create table gmv as (select user_id,user_info.`name` username,consignee,final_total_amount,province_id,base_province.`name`,sku_id,sku_info.sku_name,sku_num from user_info join order_info on user_info.id = order_info.user_id " + 111 " join order_detail on order_info.id = order_detail.order_id " + 112 " join sku_info on order_detail.sku_id = sku_info.spu_id " + 113 " join base_province on base_province.id=province_id)"; 114 MySQL_Utils.execute("test3",sql); 115 Send_Kafka_Urils.selectAndSendKafka("select * from gmv","gmv",Gmv.class); 116 //11) 读取kafka中dwd层数据,求gmv 117 FlinkKafkaConsumer<String> gsource = new FlinkKafkaConsumer<>("gmv", new SimpleStringSchema(), Propss.consumer_Props); 118 gsource.setStartFromEarliest(); 119 DataStreamSource<String> gsources = env.addSource(gsource); 120 SingleOutputStreamOperator<Gmv> gmv = gsources.map(new MapFunction<String, Gmv>() { 121 @Override 122 public Gmv map(String s) throws Exception { 123 String[] split = s.split(","); 124 return new Gmv(Integer.valueOf(split[0]), split[1], split[2], Double.valueOf(split[3]), Integer.valueOf(split[4]), split[5], 125 Integer.valueOf(split[6]), split[7], Integer.valueOf(split[8])); 126 } 127 }); 128 129 EnvironmentSettings build = EnvironmentSettings.newInstance().useBlinkPlanner().inStreamingMode().build(); 130 StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env,build); 131 tableEnv.createTemporaryView("gmv",gmv); 132 Table table = tableEnv.sqlQuery("select sum(final_total_amount) from gmv"); 133 TableResult execute = table.execute(); 134 execute.print(); 135 136 137 //12) 数据保存到clickhouse 138 //13) 查询clickhouse结果正确 139 140 env.execute(); 141 142 } 143 }