博客园 首页 私信博主 显示目录 隐藏目录 管理 动画

练习 : 用 utils 读 存 kafak,flink sql 查询

bean

 1 package bean;
 2 
 3 import lombok.AllArgsConstructor;
 4 import lombok.Data;
 5 import lombok.NoArgsConstructor;
 6 
 7 /**
 8  * Package: bean
 9  * Description: TODO
10  * Author ly
11  * Date 2022/3/12 15:16
12  * Created by dell on 2022/3/12
13  */
14 @Data
15 @AllArgsConstructor
16 @NoArgsConstructor
17 public class Base_province {
18     private Long id;
19     private String name;
20     private String region_id;
21     private String area_code;
22     private String iso_code;
23 }
 1 package bean;
 2 
 3 import lombok.AllArgsConstructor;
 4 import lombok.Data;
 5 import lombok.NoArgsConstructor;
 6 
 7 @Data
 8 @AllArgsConstructor
 9 @NoArgsConstructor
10 public class Gmv {
11     private Integer user_id;
12     private String username;
13     private String consignee;
14     private Double final_total_amount;
15     private Integer province_id;
16     private String name;
17     private Integer sku_id;
18     private String sku_name;
19     private Integer sku_num;
20 }
 1 package bean;
 2 
 3 import lombok.AllArgsConstructor;
 4 import lombok.Data;
 5 import lombok.NoArgsConstructor;
 6 
 7 /**
 8  * Package: bean
 9  * Description: TODO
10  * Author ly
11  * Date 2022/3/13 16:01
12  * Created by dell on 2022/3/13
13  */
14 @Data
15 @AllArgsConstructor
16 @NoArgsConstructor
17 public class Order_detail {
18     private Long id;
19     private Long order_id;
20     private Long sku_id;
21     private String sku_name;
22     private String img_url;
23     private Double order_price;
24     private String sku_num;
25 }
 1 package bean;
 2 
 3 import lombok.AllArgsConstructor;
 4 import lombok.Data;
 5 import lombok.NoArgsConstructor;
 6 
 7 /**
 8  * Package: bean
 9  * Description: TODO
10  * Author ly
11  * Date 2022/3/12 15:33
12  * Created by dell on 2022/3/12
13  */
14 @Data
15 @AllArgsConstructor
16 @NoArgsConstructor
17 public class Order_info {
18     private Long id;
19     private String consignee;
20     private String consignee_tel;
21     private Double final_total_amount;
22     private String order_status;
23     private Long user_id;
24     private String delivery_address;
25     private String order_comment;
26     private String out_trade_no;
27     private String trade_body;
28     private String create_time;
29     private String operate_time;
30     private String expire_time;
31     private String tracking_no;
32     private Long parent_order_id;
33     private String img_url;
34     private Integer province_id;
35     private Double benefit_reduce_amount;
36     private Double original_total_amount;
37     private Double feight_fee;
38 }
 1 package bean;
 2 
 3 import lombok.AllArgsConstructor;
 4 import lombok.Data;
 5 import lombok.NoArgsConstructor;
 6 
 7 /**
 8  * Package: bean
 9  * Description: TODO
10  * Author ly
11  * Date 2022/3/12 15:23
12  * Created by dell on 2022/3/12
13  */
14 @Data
15 @AllArgsConstructor
16 @NoArgsConstructor
17 public class Sku_info {
18     private Long id;
19     private Long spu_id;
20     private Double price;
21     private String sku_name;
22     private String sku_desc;
23     private Double weight;
24     private Long tm_id;
25     private Long category3_id;
26     private String sku_default_img;
27     private String create_time;
28 }
 1 package bean;
 2 
 3 import lombok.AllArgsConstructor;
 4 import lombok.Data;
 5 import lombok.NoArgsConstructor;
 6 
 7 /**
 8  * Package: bean
 9  * Description: TODO
10  * Author ly
11  * Date 2022/3/11 11:51
12  * Created by dell on 2022/3/11
13  */
14 @Data
15 @AllArgsConstructor
16 @NoArgsConstructor
17 public class Student {
18     private Integer sid;
19     private String sname;
20     private String sex;
21 }
 1 package bean;
 2 
 3 import lombok.AllArgsConstructor;
 4 import lombok.Data;
 5 import lombok.NoArgsConstructor;
 6 
 7 
 8 /**
 9  * Package: bean
10  * Description: TODO
11  * Author ly
12  * Date 2022/3/11 17:50
13  * Created by dell on 2022/3/11
14  */
15 @Data
16 @AllArgsConstructor
17 @NoArgsConstructor
18 public class User_info {
19     private Long id;
20     private String login_name;
21     private String nick_name;
22     private String passwd;
23     private String name;
24     private String phone_num;
25     private String email;
26     private String head_img;
27     private String user_level;
28     private String birthday;
29     private String gender;
30     private String create_time;
31 }

 

 

utils

 1 package util;
 2 
 3 import org.apache.hadoop.conf.Configuration;
 4 import java.util.HashMap;
 5 import java.util.Properties;
 6 
 7 public class Propss {
 8     public static Properties producer_Props = new Properties();
 9     public static Properties consumer_Props = new Properties();
10     static HashMap<String, Object> kafka_Consumer = new HashMap<>();
11     public static Configuration setConf(Configuration conf){
12          conf.set("hbase.zookeeper.quorum","hadoop106,hadoop107,hadoop108");
13         conf.set("hbae.zookeeper.property.client","2181");
14         return conf;
15     }
16     //january february march april may june july august september october november december
17     static{
18         kafka_Consumer.put("bootstrap.servers","hadoop106:9092,hadoop107:9092,hadoop108:9092");
19         kafka_Consumer.put("group.id", "com/test");
20         //from beginning
21         kafka_Consumer.put("auto.offset.reset","earliest");
22         kafka_Consumer.put("key.deserializer","org.apache.kafka.common.serialization.StringDeserializer");
23         kafka_Consumer.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
24 
25         producer_Props.setProperty("bootstrap.servers","hadoop106:9092,hadoop107:9092,hadoop108:9092");
26         producer_Props.setProperty("ack","all");
27         producer_Props.put("key.serializer","org.apache.kafka.common.serialization.StringSerializer");
28         producer_Props.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer");
29         producer_Props.put("auto.offset.reset","earliest");
30 
31         consumer_Props.setProperty("bootstrap.servers","hadoop106:9092,hadoop107:9092,hadoop108:9092");
32         consumer_Props.setProperty("group.id", "com/test");
33         consumer_Props.put("key.deserializer","org.apache.kafka.common.serialization.StringDeserializer");
34         consumer_Props.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
35         consumer_Props.put("auto.offset.reset","earliest");
36     }
37 }
 1 package util;
 2 
 3 import org.apache.flink.api.java.io.jdbc.JDBCInputFormat;
 4 import org.apache.flink.api.java.typeutils.RowTypeInfo;
 5 import org.apache.kafka.clients.producer.KafkaProducer;
 6 import org.apache.kafka.clients.producer.ProducerRecord;
 7 
 8 import java.lang.reflect.Field;
 9 import java.sql.*;
10 
11 public class MySQL_Utils {
12     //方法一:原生 mysql jdbc (已经废弃)
13     private static Connection conn;
14     private static PreparedStatement pre;
15     static {
16         try {
17             Class.forName("com.mysql.jdbc.Driver");
18             conn = DriverManager.getConnection("jdbc:mysql://hadoop106:3306/test3", "root", "root");
19             conn.setAutoCommit(true);
20         } catch (Exception e) {
21             e.printStackTrace();
22         }
23     }
24     //方法二:flink.api.java.io.jdbc 获取format
25     public static JDBCInputFormat getFormat(String database, String sql, RowTypeInfo rowTypeInfo){
26         String driver="com.mysql.jdbc.Driver";
27         String url="jdbc:mysql://hadoop106:3306/"+database+"?useSSL=false&useUnicode=true&characterEncoding=UTF-8";
28         String user="root";
29         String passwd="root";
30         JDBCInputFormat format = JDBCInputFormat.buildJDBCInputFormat()
31                 .setDrivername(driver)
32                 .setDBUrl(url)
33                 .setUsername(user)
34                 .setPassword(passwd)
35                 .setQuery(sql)
36                 .setRowTypeInfo(rowTypeInfo)
37                 .finish();
38         return format;
39     }
40 
41     //执行sql建立宽表
42     public static void execute(String database, String sql){
43         String driver="com.mysql.jdbc.Driver";
44         String url="jdbc:mysql://hadoop106:3306/"+database+"?useSSL=false&useUnicode=true&characterEncoding=UTF-8";
45         String user="root";
46         String passwd="root";
47         try {
48             Class.forName(driver);
49             conn = DriverManager.getConnection(url, user, passwd);
50             conn.setAutoCommit(true);
51             pre = conn.prepareStatement(sql);
52             pre.execute();
53         } catch (Exception throwables) {
54             throwables.printStackTrace();
55         }
56     }
57     //表发送到kafka
58     public static void sendKafka(String sql,String topic,Class T) throws Exception {
59 
60       Field[] fields =T.getClass().getDeclaredFields();
61       String[] fieldName = new String[fields.length];
62       for (int i = 0; i < fieldName.length; i++) {
63           fieldName[i] = fields[i].getName();
64       }
65 
66         PreparedStatement ps = conn.prepareStatement(sql);
67         ResultSet rs = ps.executeQuery();
68         while (rs.next()){
69             //Producer
70             KafkaProducer kafkaProducer = new KafkaProducer(Propss.producer_Props);
71             String line="";
72             for (int i = 0; i < fieldName.length; i++) {
73                 line+=",";
74                 line+=rs.getString(fieldName[i]);
75             }
76             line=line.substring(1);
77             System.out.println(line);
78             kafkaProducer.send(new ProducerRecord(topic,line));
79             kafkaProducer.close();
80         }
81     }
82 }
 1 package util;
 2 
 3 import org.apache.hadoop.conf.Configuration;
 4 import org.apache.hadoop.hbase.HBaseConfiguration;
 5 import org.apache.hadoop.hbase.client.Connection;
 6 import org.apache.hadoop.hbase.client.ConnectionFactory;
 7 
 8 import java.io.IOException;
 9 
10 public class HBase_Util {
11     public static Connection getConf(){
12         //创建HBase的配置对象
13         Configuration conf = HBaseConfiguration.create();
14         //设置hbase配置属性
15         conf.set("hbase.zookeeper.quorum","hadoop106,hadoop107,hadoop108");
16         conf.set("hbase.zookeeper.property.clientPort","2181");
17         Connection connection=null;
18         //通过连接函数,创建连接对象
19         try {
20             connection = ConnectionFactory.createConnection(conf);
21 
22         } catch (IOException e) {
23             e.printStackTrace();
24         }
25         return connection;
26     }
27 }
 1 package util;
 2 
 3 import org.apache.kafka.clients.producer.KafkaProducer;
 4 import org.apache.kafka.clients.producer.ProducerRecord;
 5 
 6 import java.lang.reflect.Field;
 7 import java.sql.Connection;
 8 import java.sql.DriverManager;
 9 import java.sql.PreparedStatement;
10 import java.sql.ResultSet;
11 
12 public class Send_Kafka_Urils {
13     private static Connection conn;
14     static {
15         try {
16             Class.forName("com.mysql.jdbc.Driver");
17             conn = DriverManager.getConnection("jdbc:mysql://hadoop106:3306/test3", "root", "root");
18             conn.setAutoCommit(true);
19         } catch (Exception e) {
20             e.printStackTrace();
21         }
22     }
23     public static void selectAndSendKafka(String sql,String topic,Class T) throws Exception {
24 
25         Field[] fields = T.getDeclaredFields();
26         String[] fieldName = new String[fields.length];
27         for (int i = 0; i < fieldName.length; i++) {
28             fieldName[i] = fields[i].getName();
29         }
30 
31         PreparedStatement ps = conn.prepareStatement(sql);
32         ResultSet rs = ps.executeQuery();
33         while (rs.next()) {
34             //Producer
35             KafkaProducer kafkaProducer = new KafkaProducer(Propss.producer_Props);
36             String line = "";
37             for (int i = 0; i < fieldName.length; i++) {
38                 line += ",";
39                 line += rs.getString(fieldName[i]);
40             }
41             line = line.substring(1);
42             System.out.println(line);
43             kafkaProducer.send(new ProducerRecord(topic, line));
44             kafkaProducer.close();
45         }
46     }
47 
48         public static void sendKafka(String line,String topic) throws Exception {
49 
50             while (line!=null){
51                 //Producer
52                 KafkaProducer kafkaProducer = new KafkaProducer(Propss.producer_Props);
53                 kafkaProducer.send(new ProducerRecord(topic,line));
54                 kafkaProducer.close();
55             }
56     }
57 }

 

sink

 1 package sink;
 2 
 3 import util.HBase_Util;
 4 import org.apache.flink.configuration.Configuration;
 5 import org.apache.flink.streaming.api.functions.sink.RichSinkFunction;
 6 import org.apache.hadoop.hbase.TableName;
 7 import org.apache.hadoop.hbase.client.Connection;
 8 import org.apache.hadoop.hbase.client.Put;
 9 import org.apache.hadoop.hbase.client.Table;
10 import org.apache.hadoop.hbase.util.Bytes;
11 
12 import java.lang.reflect.Field;
13 import java.util.ArrayList;
14 import java.util.List;
15 import java.util.UUID;
16 
17 public class HBase_Sink extends RichSinkFunction<String>  {
18     private Connection connection;
19     private Class T;
20     private String tableName;
21     private String[] fieldsName;
22     List<Put> list=new ArrayList<Put>();
23     
24     public static String[] getFiledName(Class T) {
25         Field[] fields =T.getDeclaredFields();
26         String[] fieldName = new String[fields.length];
27         for (int i = 0; i < fieldName.length; i++) {
28             fieldName[i] = fields[i].getName();
29         }
30         return fieldName;
31     }
32 
33     public HBase_Sink(Class T,String tableName){
34         this.T=T;
35         this.tableName=tableName;
36         this.fieldsName=getFiledName(T);
37     }
38 
39     @Override
40     public void open(Configuration parameters) throws Exception {
41         connection= HBase_Util.getConf();
42     }
43 
44     @Override
45     public void invoke(String value, Context context) throws Exception {
46         String[] s1 = value.split(",");
47         Table table = connection.getTable(TableName.valueOf(tableName));
48 //        String rowkey = UUID.randomUUID().toString().replaceAll("-", "");
49         Put put = new Put(Bytes.toBytes(s1[0]));
50         for (int i = 0; i < fieldsName.length; i++) {
51             put.addColumn(Bytes.toBytes("info"),Bytes.toBytes(fieldsName[i]),Bytes.toBytes(s1[i]));
52             list.add(put);
53         }
54         table.put(list);
55     }
56 
57     @Override
58     public void close() throws Exception {
59         connection.close();
60     }
61 }

 

test

  1 package test;
  2 
  3 import bean.*;
  4 import org.apache.flink.api.common.functions.MapFunction;
  5 import org.apache.flink.api.common.serialization.SimpleStringSchema;
  6 import org.apache.flink.api.common.typeinfo.BasicTypeInfo;
  7 import org.apache.flink.api.java.io.jdbc.JDBCInputFormat;
  8 import org.apache.flink.api.java.typeutils.RowTypeInfo;
  9 import org.apache.flink.streaming.api.CheckpointingMode;
 10 import org.apache.flink.streaming.api.datastream.DataStreamSource;
 11 import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
 12 import org.apache.flink.streaming.api.environment.LocalStreamEnvironment;
 13 import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
 14 import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer;
 15 import org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer;
 16 import org.apache.flink.table.api.EnvironmentSettings;
 17 import org.apache.flink.table.api.Table;
 18 import org.apache.flink.table.api.TableResult;
 19 import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
 20 import org.apache.flink.types.Row;
 21 import sink.HBase_Sink;
 22 import util.MySQL_Utils;
 23 import util.Propss;
 24 import util.Send_Kafka_Urils;
 25 
 26 
 27 public class Flink_Test {
 28     public static void main(String[] args) throws Exception {
 29         //3)    创建flink环境,要求并行度为1,设置checkpoint为精准一次消费
 30         LocalStreamEnvironment env = StreamExecutionEnvironment.createLocalEnvironment();
 31         env.setParallelism(1);
 32         env.getCheckpointConfig().setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE);
 33 
 34         //4)    读取kafka数据源并格式化
 35         DataStreamSource<String> ods_db_temp = env.addSource(new FlinkKafkaConsumer<String>("ods_db_temp", new SimpleStringSchema(), Propss.consumer_Props));
 36         SingleOutputStreamOperator<Student> map = ods_db_temp.map(new MapFunction<String, Student>() {
 37             @Override
 38             public Student map(String s) throws Exception {
 39                 String[] split = s.split(",");
 40                 return new Student(Integer.valueOf(split[0]), split[1], split[2]);
 41             }
 42         });
 43         map.print();
 44 
 45 
 46 
 47         //5)    创建维度标签 发送到对应的kafka topic
 48         RowTypeInfo rowTypeInfo = new RowTypeInfo(
 49                 BasicTypeInfo.LONG_TYPE_INFO,
 50                 BasicTypeInfo.STRING_TYPE_INFO,
 51                 BasicTypeInfo.STRING_TYPE_INFO,
 52                 BasicTypeInfo.STRING_TYPE_INFO,
 53                 BasicTypeInfo.STRING_TYPE_INFO,
 54                 BasicTypeInfo.STRING_TYPE_INFO,
 55                 BasicTypeInfo.STRING_TYPE_INFO,
 56                 BasicTypeInfo.STRING_TYPE_INFO,
 57                 BasicTypeInfo.STRING_TYPE_INFO,
 58                 BasicTypeInfo.DATE_TYPE_INFO,
 59                 BasicTypeInfo.STRING_TYPE_INFO,
 60                 BasicTypeInfo.DATE_TYPE_INFO
 61         );
 62         JDBCInputFormat user_format = MySQL_Utils.getFormat("test3", "select * from user_info",rowTypeInfo);
 63         DataStreamSource<Row> input = env.createInput(user_format);
 64         input.print();
 65         SingleOutputStreamOperator<String> line_ = input.map(new MapFunction<Row, String>() {
 66             @Override
 67             public String map(Row row) throws Exception {
 68                 String[] fields = rowTypeInfo.getFieldNames();
 69                 String line = "";
 70                 for (int i = 0; i < fields.length; i++) {
 71                     line += ",";
 72                     line += row.getField(i);
 73                 }
 74                 return line.substring(1);
 75             }
 76         });
 77         line_.addSink(new FlinkKafkaProducer("hadoop106:9092", "user_", new SimpleStringSchema()));
 78 
 79         //6)    动态分流 把用户维度,地区维度,商品维度表 发送到 kafka 对应的 topic 中
 80         Send_Kafka_Urils.selectAndSendKafka("select * from user_info","user_info", User_info.class);
 81         Send_Kafka_Urils.selectAndSendKafka("select * from base_province","base_province", Base_province.class);
 82         Send_Kafka_Urils.selectAndSendKafka("select * from sku_info","sku_info", Sku_info.class);
 83 
 84         //7)    维度标签数据 保存到hbase
 85         FlinkKafkaConsumer<String> sss = new FlinkKafkaConsumer<>("user_info", new SimpleStringSchema(), Propss.consumer_Props);
 86         sss.setStartFromEarliest();
 87         DataStreamSource<String> user = env.addSource(sss);
 88         user.print();
 89         user.addSink(new HBase_Sink(User_info.class,"user_info"));
 90 
 91         FlinkKafkaConsumer<String> sst = new FlinkKafkaConsumer<>("base_province", new SimpleStringSchema(), Propss.consumer_Props);
 92         sst.setStartFromEarliest();
 93         DataStreamSource<String> province = env.addSource(sst);
 94         province.print();
 95         province.addSink(new HBase_Sink(Base_province.class,"base_province"));
 96 
 97         FlinkKafkaConsumer<String> ssw = new FlinkKafkaConsumer<>("sku_info", new SimpleStringSchema(), Propss.consumer_Props);
 98         ssw.setStartFromEarliest();
 99         DataStreamSource<String> sku = env.addSource(ssw);
100         sku.print();
101         sku.addSink(new HBase_Sink(Sku_info.class,"sku_info"));
102 
103         //8)    动态分流把订单事实表写入到kafka的ods_db主题中
104         Send_Kafka_Urils.selectAndSendKafka("select * from order_info","ods_db", Order_info.class);
105 
106         //9)    读取kafka中的ods_db数据
107         DataStreamSource<String> ods_db = env.addSource(new FlinkKafkaConsumer<String>("ods_db", new SimpleStringSchema(), Propss.consumer_Props));
108         ods_db.print();
109         //10)    订单表拉宽关联用户,地区,商品维度,并把结果保存到kafka的dwd_db主题中
110         String sql="create table gmv as (select user_id,user_info.`name` username,consignee,final_total_amount,province_id,base_province.`name`,sku_id,sku_info.sku_name,sku_num from user_info  join order_info on user_info.id = order_info.user_id " +
111                 " join order_detail on order_info.id = order_detail.order_id " +
112                 " join sku_info on order_detail.sku_id = sku_info.spu_id " +
113                 " join base_province on base_province.id=province_id)";
114         MySQL_Utils.execute("test3",sql);
115         Send_Kafka_Urils.selectAndSendKafka("select * from gmv","gmv",Gmv.class);
116         //11)    读取kafka中dwd层数据,求gmv
117        FlinkKafkaConsumer<String> gsource = new FlinkKafkaConsumer<>("gmv", new SimpleStringSchema(), Propss.consumer_Props);
118        gsource.setStartFromEarliest();
119         DataStreamSource<String> gsources = env.addSource(gsource);
120         SingleOutputStreamOperator<Gmv> gmv = gsources.map(new MapFunction<String, Gmv>() {
121             @Override
122             public Gmv map(String s) throws Exception {
123                 String[] split = s.split(",");
124                 return new Gmv(Integer.valueOf(split[0]), split[1], split[2], Double.valueOf(split[3]), Integer.valueOf(split[4]), split[5],
125                         Integer.valueOf(split[6]), split[7], Integer.valueOf(split[8]));
126             }
127         });
128 
129         EnvironmentSettings build = EnvironmentSettings.newInstance().useBlinkPlanner().inStreamingMode().build();
130         StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env,build);
131         tableEnv.createTemporaryView("gmv",gmv);
132         Table table = tableEnv.sqlQuery("select sum(final_total_amount) from gmv");
133         TableResult execute = table.execute();
134         execute.print();
135 
136 
137         //12)    数据保存到clickhouse
138         //13)    查询clickhouse结果正确
139 
140         env.execute();
141 
142     }
143 }

 

 
 
posted @ 2022-03-14 16:29  CHANG_09  阅读(48)  评论(0编辑  收藏  举报