springboot 使用 doris-streamloader 到doris 防止批量更新 事务卡主
背景:
使用mybatis 批量实时和更新doris时 经常出现连接不上的错误,导致kafka死信队列堆积很多滞后消费
https://doris.apache.org/zh-CN/docs/2.0/ecosystem/doris-streamloader/
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 | package com.jiaoda.sentiment.data.etl.service.update; import cn.hutool.core.text.CharSequenceUtil; import cn.hutool.core.util.StrUtil; import com.alibaba.fastjson.JSON; import com.alibaba.fastjson.JSONObject; import com.jiaoda.sentiment.data.etl.service.biz.DwdPublicOpinionDataService; import lombok.Data; import lombok.extern.log4j.Log4j2; import org.apache.commons.codec.binary.Base64; import org.apache.http.HttpEntity; import org.apache.http.HttpHeaders; import org.apache.http.client.methods.CloseableHttpResponse; import org.apache.http.client.methods.HttpGet; import org.apache.http.client.methods.HttpPost; import org.apache.http.client.methods.HttpPut; import org.apache.http.entity.ContentType; import org.apache.http.entity.FileEntity; import org.apache.http.impl.client.CloseableHttpClient; import org.apache.http.impl.client.DefaultRedirectStrategy; import org.apache.http.impl.client.HttpClientBuilder; import org.apache.http.impl.client.HttpClients; import org.apache.http.util.EntityUtils; import org.springframework.beans.factory.annotation.Value; import org.springframework.stereotype.Service; import sun.misc.BASE64Encoder; import javax.annotation.PostConstruct; import java.io.File; import java.io.IOException; import java.nio.charset.StandardCharsets; import static java.util.jar.Pack200.Unpacker.TRUE; /** * @author by jerryjhhe * @description todo * @create 2024/5/22 13:41 */ @Service @Log4j2 public class DorisStreamLoadClient { @Value ( "${spring.datasource.dynamic.datasource.master.url}" ) private String dorisIP; private final String user = "root" ; private final String password = "" ; private final String credentials = user + ":" + password; BASE64Encoder encoder = new BASE64Encoder(); //通过BASE64对账号密码加密 String ticket = encoder.encode(credentials.getBytes()); private final static String DATABASE = "analysis" ; // 要导入的数据库 private final static String TABLE = "dwd_public_opinion_data" ; // 要导入的表 /* private final String loadUrl = String.format("http://%s:%s/api/%s/%s/_stream_load", dorisIP, 8030, DATABASE, TABLE);*/ private String urlTemplateContent = "http://{}:8030/api/{}/{}/_stream_load" ; private final CloseableHttpClient client = httpClientBuilder.build(); @PostConstruct public void init() { dorisIP = dorisIP.split( ":" )[ 2 ].replace( "//" , "" ); log.info( "DorisStreamLoadClient doris ip :{}" , dorisIP); } private final static HttpClientBuilder httpClientBuilder = HttpClients .custom() .setRedirectStrategy( new DefaultRedirectStrategy() { @Override protected boolean isRedirectable(String method) { // 如果连接目标是 FE,则需要处理 307 redirect。 return true ; } }); private String basicAuthHeader(String username, String password) { final String tobeEncode = username + ":" + password; byte [] encoded = Base64.encodeBase64(tobeEncode.getBytes(StandardCharsets.UTF_8)); return "Basic " + new String(encoded); } public StreamLoadResult putData(File file, String db, String table) throws IOException { String loadUrl = CharSequenceUtil.format(urlTemplateContent, dorisIP, db, table); try (CloseableHttpClient client = httpClientBuilder.build()) { HttpPut put = new HttpPut(loadUrl); put.setHeader(HttpHeaders.EXPECT, "100-continue" ); put.setHeader(HttpHeaders.AUTHORIZATION, basicAuthHeader(user,password)); // 可以在 Header 中设置 stream load 相关属性,这里我们设置 label 和 column_separator。 put.setHeader( "label" , "label_" + StrUtil.uuid()); put.setHeader( "format" , "json" ); put.setHeader( "Content-Type" , ContentType.APPLICATION_JSON.toString()); put.setHeader( "strip_outer_array" , TRUE); // Array 中的每行数据的字段顺序完全一致。Doris 仅会根据第一行的字段顺序做解析,然后以下标的形式访问之后的数据。该方式可以提升 3-5X 的导入效率。 put.setHeader( "fuzzy_parse" , TRUE); // put.setHeader("jsonpaths","[\"$.siteid\",\"$.username\"]"); // put.setHeader("columns","siteid,username,doris_update_time=current_timestamp()"); FileEntity entity = new FileEntity(file); put.setEntity(entity); System.out.print(entity); try (CloseableHttpResponse response = client.execute(put)) { response.setHeader(HttpHeaders.AUTHORIZATION, basicAuthHeader(user,password)); String loadResult = "" ; if (response.getEntity() != null ) { loadResult = EntityUtils.toString(response.getEntity()); } final int statusCode = response.getStatusLine().getStatusCode(); if (statusCode != 200 ) { throw new IOException( String.format( "Stream load failed. status: %s load result: %s" , statusCode, loadResult)); } log.info( "Get load result: {}" , loadResult); return JSON.parseObject(loadResult,StreamLoadResult. class ); } } } public static void main(String[] args) throws IOException { DorisStreamLoadClient dorisStreamLoadClient = new DorisStreamLoadClient(); StreamLoadResult streamLoadResult = dorisStreamLoadClient.putData( new File( "C:\\home\\doris_stream_load\\update_dwdPublicOpinionData.csv" ), "analysis" , "dwd_public_opinion_data" ); System.out.println(streamLoadResult); } @Data public static class StreamLoadResult { private long Txnid; private String Label; private String Comment; private boolean TwoPhaseCommit; private String Status; private String Message; private long numberTotalRows; private long numberLoadedRows; private long numberFilteredRows; private long numberUnselectedRows; private long loadBytes; private long loadTimeMs; private long beginTxnTimeMs; private long streamLoadPutTimeMs; private long readDataTimeMs; private long writeDataTimeMs; private long commitAndPublishTimeMs; } } |
使用:
public void test(Object dwdPublicOpinionDataList){
try {
String jsonString = JSON.toJSONString(dwdPublicOpinionDataList);
FileUtils.write(new File(path), jsonString, "utf-8", true);
DorisStreamLoadClient.StreamLoadResult streamLoadResult = dorisStreamLoadClient.putData(new File(path), "analysis", "dwd_public_opinion_data");
if ("Success".equals(streamLoadResult.getStatus())) {
//成功后的逻辑
}
} catch (
IOException e) {
log.error("dorisStreamLoadClient{}失败 :{}", path, e);
} finally {
try {
FileUtils.delete(new File(path));
} catch (IOException e) {
log.error("删除{}失败 :{}", path, e);
return;
}
}
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步