springboot 使用 doris-streamloader 到doris 防止批量更新 事务卡主
背景:
使用mybatis 批量实时和更新doris时 经常出现连接不上的错误,导致kafka死信队列堆积很多滞后消费
https://doris.apache.org/zh-CN/docs/2.0/ecosystem/doris-streamloader/
package com.jiaoda.sentiment.data.etl.service.update; import cn.hutool.core.text.CharSequenceUtil; import cn.hutool.core.util.StrUtil; import com.alibaba.fastjson.JSON; import com.alibaba.fastjson.JSONObject; import com.jiaoda.sentiment.data.etl.service.biz.DwdPublicOpinionDataService; import lombok.Data; import lombok.extern.log4j.Log4j2; import org.apache.commons.codec.binary.Base64; import org.apache.http.HttpEntity; import org.apache.http.HttpHeaders; import org.apache.http.client.methods.CloseableHttpResponse; import org.apache.http.client.methods.HttpGet; import org.apache.http.client.methods.HttpPost; import org.apache.http.client.methods.HttpPut; import org.apache.http.entity.ContentType; import org.apache.http.entity.FileEntity; import org.apache.http.impl.client.CloseableHttpClient; import org.apache.http.impl.client.DefaultRedirectStrategy; import org.apache.http.impl.client.HttpClientBuilder; import org.apache.http.impl.client.HttpClients; import org.apache.http.util.EntityUtils; import org.springframework.beans.factory.annotation.Value; import org.springframework.stereotype.Service; import sun.misc.BASE64Encoder; import javax.annotation.PostConstruct; import java.io.File; import java.io.IOException; import java.nio.charset.StandardCharsets; import static java.util.jar.Pack200.Unpacker.TRUE; /** * @author by jerryjhhe * @description todo * @create 2024/5/22 13:41 */ @Service @Log4j2 public class DorisStreamLoadClient { @Value("${spring.datasource.dynamic.datasource.master.url}") private String dorisIP; private final String user = "root"; private final String password = ""; private final String credentials = user + ":" + password; BASE64Encoder encoder = new BASE64Encoder(); //通过BASE64对账号密码加密 String ticket = encoder.encode(credentials.getBytes()); private final static String DATABASE = "analysis"; // 要导入的数据库 private final static String TABLE = "dwd_public_opinion_data"; // 要导入的表 /* private final String loadUrl = String.format("http://%s:%s/api/%s/%s/_stream_load", dorisIP, 8030, DATABASE, TABLE);*/ private String urlTemplateContent = "http://{}:8030/api/{}/{}/_stream_load"; private final CloseableHttpClient client = httpClientBuilder.build(); @PostConstruct public void init() { dorisIP = dorisIP.split(":")[2].replace("//", ""); log.info("DorisStreamLoadClient doris ip :{}", dorisIP); } private final static HttpClientBuilder httpClientBuilder = HttpClients .custom() .setRedirectStrategy(new DefaultRedirectStrategy() { @Override protected boolean isRedirectable(String method) { // 如果连接目标是 FE,则需要处理 307 redirect。 return true; } }); private String basicAuthHeader(String username, String password) { final String tobeEncode = username + ":" + password; byte[] encoded = Base64.encodeBase64(tobeEncode.getBytes(StandardCharsets.UTF_8)); return "Basic " + new String(encoded); } public StreamLoadResult putData(File file, String db, String table) throws IOException { String loadUrl = CharSequenceUtil.format(urlTemplateContent, dorisIP, db, table); try (CloseableHttpClient client = httpClientBuilder.build()) { HttpPut put = new HttpPut(loadUrl); put.setHeader(HttpHeaders.EXPECT, "100-continue"); put.setHeader(HttpHeaders.AUTHORIZATION, basicAuthHeader(user,password)); // 可以在 Header 中设置 stream load 相关属性,这里我们设置 label 和 column_separator。 put.setHeader("label", "label_" + StrUtil.uuid()); put.setHeader("format", "json"); put.setHeader("Content-Type", ContentType.APPLICATION_JSON.toString()); put.setHeader("strip_outer_array", TRUE); // Array 中的每行数据的字段顺序完全一致。Doris 仅会根据第一行的字段顺序做解析,然后以下标的形式访问之后的数据。该方式可以提升 3-5X 的导入效率。 put.setHeader("fuzzy_parse", TRUE); // put.setHeader("jsonpaths","[\"$.siteid\",\"$.username\"]"); // put.setHeader("columns","siteid,username,doris_update_time=current_timestamp()"); FileEntity entity = new FileEntity(file); put.setEntity(entity); System.out.print(entity); try (CloseableHttpResponse response = client.execute(put)) { response.setHeader(HttpHeaders.AUTHORIZATION, basicAuthHeader(user,password)); String loadResult = ""; if (response.getEntity() != null) { loadResult = EntityUtils.toString(response.getEntity()); } final int statusCode = response.getStatusLine().getStatusCode(); if (statusCode != 200) { throw new IOException( String.format("Stream load failed. status: %s load result: %s", statusCode, loadResult)); } log.info("Get load result: {}" , loadResult); return JSON.parseObject(loadResult,StreamLoadResult.class); } } } public static void main(String[] args) throws IOException { DorisStreamLoadClient dorisStreamLoadClient = new DorisStreamLoadClient(); StreamLoadResult streamLoadResult = dorisStreamLoadClient.putData(new File("C:\\home\\doris_stream_load\\update_dwdPublicOpinionData.csv"), "analysis", "dwd_public_opinion_data"); System.out.println(streamLoadResult); } @Data public static class StreamLoadResult { private long Txnid; private String Label; private String Comment; private boolean TwoPhaseCommit; private String Status; private String Message; private long numberTotalRows; private long numberLoadedRows; private long numberFilteredRows; private long numberUnselectedRows; private long loadBytes; private long loadTimeMs; private long beginTxnTimeMs; private long streamLoadPutTimeMs; private long readDataTimeMs; private long writeDataTimeMs; private long commitAndPublishTimeMs; } }
使用:
public void test(Object dwdPublicOpinionDataList){
try {
String jsonString = JSON.toJSONString(dwdPublicOpinionDataList);
FileUtils.write(new File(path), jsonString, "utf-8", true);
DorisStreamLoadClient.StreamLoadResult streamLoadResult = dorisStreamLoadClient.putData(new File(path), "analysis", "dwd_public_opinion_data");
if ("Success".equals(streamLoadResult.getStatus())) {
//成功后的逻辑
}
} catch (
IOException e) {
log.error("dorisStreamLoadClient{}失败 :{}", path, e);
} finally {
try {
FileUtils.delete(new File(path));
} catch (IOException e) {
log.error("删除{}失败 :{}", path, e);
return;
}
}