springboot 使用 doris-streamloader 到doris 防止批量更新 事务卡主

背景:

使用mybatis 批量实时和更新doris时 经常出现连接不上的错误,导致kafka死信队列堆积很多滞后消费

https://doris.apache.org/zh-CN/docs/2.0/ecosystem/doris-streamloader/

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
package com.jiaoda.sentiment.data.etl.service.update;
 
import cn.hutool.core.text.CharSequenceUtil;
import cn.hutool.core.util.StrUtil;
import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONObject;
import com.jiaoda.sentiment.data.etl.service.biz.DwdPublicOpinionDataService;
import lombok.Data;
import lombok.extern.log4j.Log4j2;
import org.apache.commons.codec.binary.Base64;
import org.apache.http.HttpEntity;
import org.apache.http.HttpHeaders;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.client.methods.HttpPut;
import org.apache.http.entity.ContentType;
import org.apache.http.entity.FileEntity;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.DefaultRedirectStrategy;
import org.apache.http.impl.client.HttpClientBuilder;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Service;
import sun.misc.BASE64Encoder;
 
import javax.annotation.PostConstruct;
import java.io.File;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
 
import static java.util.jar.Pack200.Unpacker.TRUE;
 
/**
 * @author by jerryjhhe
 * @description todo
 * @create 2024/5/22 13:41
 */
@Service
@Log4j2
public class DorisStreamLoadClient {
 
 
    @Value("${spring.datasource.dynamic.datasource.master.url}")
    private String dorisIP;
 
    private final String user = "root";
 
    private final String password = "";
 
    private final String credentials = user + ":" + password;
    BASE64Encoder encoder = new BASE64Encoder();
    //通过BASE64对账号密码加密
    String ticket = encoder.encode(credentials.getBytes());
    private final static String DATABASE = "analysis";   // 要导入的数据库
    private final static String TABLE = "dwd_public_opinion_data";     // 要导入的表
   /* private final  String loadUrl = String.format("http://%s:%s/api/%s/%s/_stream_load",
            dorisIP, 8030, DATABASE, TABLE);*/
    private String urlTemplateContent = "http://{}:8030/api/{}/{}/_stream_load";
 
    private final CloseableHttpClient client = httpClientBuilder.build();
 
 
    @PostConstruct
    public void init() {
        dorisIP = dorisIP.split(":")[2].replace("//", "");
        log.info("DorisStreamLoadClient doris ip :{}", dorisIP);
    }
 
    private final static HttpClientBuilder httpClientBuilder = HttpClients
            .custom()
            .setRedirectStrategy(new DefaultRedirectStrategy() {
                @Override
                protected boolean isRedirectable(String method) {
                    // 如果连接目标是 FE,则需要处理 307 redirect。
                    return true;
                }
            });
 
    private String basicAuthHeader(String username, String password) {
        final String tobeEncode = username + ":" + password;
        byte[] encoded = Base64.encodeBase64(tobeEncode.getBytes(StandardCharsets.UTF_8));
        return "Basic " + new String(encoded);
    }
 
    public StreamLoadResult putData(File file, String db, String table) throws IOException {
        String loadUrl = CharSequenceUtil.format(urlTemplateContent, dorisIP, db, table);
 
        try (CloseableHttpClient client = httpClientBuilder.build()) {
            HttpPut put = new HttpPut(loadUrl);
            put.setHeader(HttpHeaders.EXPECT, "100-continue");
            put.setHeader(HttpHeaders.AUTHORIZATION, basicAuthHeader(user,password));
 
            // 可以在 Header 中设置 stream load 相关属性,这里我们设置 label 和 column_separator。
            put.setHeader("label", "label_" + StrUtil.uuid());
            put.setHeader("format", "json");
            put.setHeader("Content-Type", ContentType.APPLICATION_JSON.toString());
            put.setHeader("strip_outer_array", TRUE);
            // Array 中的每行数据的字段顺序完全一致。Doris 仅会根据第一行的字段顺序做解析,然后以下标的形式访问之后的数据。该方式可以提升 3-5X 的导入效率。
            put.setHeader("fuzzy_parse", TRUE);
 
//            put.setHeader("jsonpaths","[\"$.siteid\",\"$.username\"]");
//            put.setHeader("columns","siteid,username,doris_update_time=current_timestamp()");
 
            FileEntity entity = new FileEntity(file);
            put.setEntity(entity);
            System.out.print(entity);
 
            try (CloseableHttpResponse response = client.execute(put)) {
                response.setHeader(HttpHeaders.AUTHORIZATION, basicAuthHeader(user,password));
 
 
                String loadResult = "";
                if (response.getEntity() != null) {
                    loadResult = EntityUtils.toString(response.getEntity());
                }
 
                final int statusCode = response.getStatusLine().getStatusCode();
                if (statusCode != 200) {
                    throw new IOException(
                            String.format("Stream load failed. status: %s load result: %s", statusCode, loadResult));
                }
 
               log.info("Get load result: {}" , loadResult);
 
                return JSON.parseObject(loadResult,StreamLoadResult.class);
            }
        }
    }
 
    public static void main(String[] args) throws IOException {
        DorisStreamLoadClient dorisStreamLoadClient = new DorisStreamLoadClient();
        StreamLoadResult streamLoadResult = dorisStreamLoadClient.putData(new File("C:\\home\\doris_stream_load\\update_dwdPublicOpinionData.csv"), "analysis", "dwd_public_opinion_data");
        System.out.println(streamLoadResult);
    }
 
    @Data
    public static class StreamLoadResult {
        private long Txnid;
        private String Label;
        private String Comment;
        private boolean TwoPhaseCommit;
        private String Status;
        private String Message;
        private long numberTotalRows;
        private long numberLoadedRows;
        private long numberFilteredRows;
        private long numberUnselectedRows;
        private long loadBytes;
        private long loadTimeMs;
        private long beginTxnTimeMs;
        private long streamLoadPutTimeMs;
        private long readDataTimeMs;
        private long writeDataTimeMs;
        private long commitAndPublishTimeMs;
    }
}

 使用:


public void test(Object dwdPublicOpinionDataList){

try {
String jsonString = JSON.toJSONString(dwdPublicOpinionDataList);

FileUtils.write(new File(path), jsonString, "utf-8", true);
DorisStreamLoadClient.StreamLoadResult streamLoadResult = dorisStreamLoadClient.putData(new File(path), "analysis", "dwd_public_opinion_data");
if ("Success".equals(streamLoadResult.getStatus())) {
//成功后的逻辑
}

} catch (
IOException e) {
log.error("dorisStreamLoadClient{}失败 :{}", path, e);
} finally {
try {
FileUtils.delete(new File(path));
} catch (IOException e) {
log.error("删除{}失败 :{}", path, e);
return;
}
}

  

posted @   夜半钟声到客船  阅读(458)  评论(0编辑  收藏  举报
点击右上角即可分享
微信分享提示