springboot 使用 doris-streamloader 到doris 防止批量更新 事务卡主

背景:

使用mybatis 批量实时和更新doris时 经常出现连接不上的错误,导致kafka死信队列堆积很多滞后消费

https://doris.apache.org/zh-CN/docs/2.0/ecosystem/doris-streamloader/

package com.jiaoda.sentiment.data.etl.service.update;

import cn.hutool.core.text.CharSequenceUtil;
import cn.hutool.core.util.StrUtil;
import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONObject;
import com.jiaoda.sentiment.data.etl.service.biz.DwdPublicOpinionDataService;
import lombok.Data;
import lombok.extern.log4j.Log4j2;
import org.apache.commons.codec.binary.Base64;
import org.apache.http.HttpEntity;
import org.apache.http.HttpHeaders;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.client.methods.HttpPut;
import org.apache.http.entity.ContentType;
import org.apache.http.entity.FileEntity;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.DefaultRedirectStrategy;
import org.apache.http.impl.client.HttpClientBuilder;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Service;
import sun.misc.BASE64Encoder;

import javax.annotation.PostConstruct;
import java.io.File;
import java.io.IOException;
import java.nio.charset.StandardCharsets;

import static java.util.jar.Pack200.Unpacker.TRUE;

/**
 * @author by jerryjhhe
 * @description todo
 * @create 2024/5/22 13:41
 */
@Service
@Log4j2
public class DorisStreamLoadClient {


    @Value("${spring.datasource.dynamic.datasource.master.url}")
    private String dorisIP;

    private final String user = "root";

    private final String password = "";

    private final String credentials = user + ":" + password;
    BASE64Encoder encoder = new BASE64Encoder();
    //通过BASE64对账号密码加密
    String ticket = encoder.encode(credentials.getBytes());
    private final static String DATABASE = "analysis";   // 要导入的数据库
    private final static String TABLE = "dwd_public_opinion_data";     // 要导入的表
   /* private final  String loadUrl = String.format("http://%s:%s/api/%s/%s/_stream_load",
            dorisIP, 8030, DATABASE, TABLE);*/
    private String urlTemplateContent = "http://{}:8030/api/{}/{}/_stream_load";

    private final CloseableHttpClient client = httpClientBuilder.build();


    @PostConstruct
    public void init() {
        dorisIP = dorisIP.split(":")[2].replace("//", "");
        log.info("DorisStreamLoadClient doris ip :{}", dorisIP);
    }

    private final static HttpClientBuilder httpClientBuilder = HttpClients
            .custom()
            .setRedirectStrategy(new DefaultRedirectStrategy() {
                @Override
                protected boolean isRedirectable(String method) {
                    // 如果连接目标是 FE,则需要处理 307 redirect。
                    return true;
                }
            });

    private String basicAuthHeader(String username, String password) {
        final String tobeEncode = username + ":" + password;
        byte[] encoded = Base64.encodeBase64(tobeEncode.getBytes(StandardCharsets.UTF_8));
        return "Basic " + new String(encoded);
    }

    public StreamLoadResult putData(File file, String db, String table) throws IOException {
        String loadUrl = CharSequenceUtil.format(urlTemplateContent, dorisIP, db, table);

        try (CloseableHttpClient client = httpClientBuilder.build()) {
            HttpPut put = new HttpPut(loadUrl);
            put.setHeader(HttpHeaders.EXPECT, "100-continue");
            put.setHeader(HttpHeaders.AUTHORIZATION, basicAuthHeader(user,password));

            // 可以在 Header 中设置 stream load 相关属性,这里我们设置 label 和 column_separator。
            put.setHeader("label", "label_" + StrUtil.uuid());
            put.setHeader("format", "json");
            put.setHeader("Content-Type", ContentType.APPLICATION_JSON.toString());
            put.setHeader("strip_outer_array", TRUE);
            // Array 中的每行数据的字段顺序完全一致。Doris 仅会根据第一行的字段顺序做解析,然后以下标的形式访问之后的数据。该方式可以提升 3-5X 的导入效率。
            put.setHeader("fuzzy_parse", TRUE);

//            put.setHeader("jsonpaths","[\"$.siteid\",\"$.username\"]");
//            put.setHeader("columns","siteid,username,doris_update_time=current_timestamp()");

            FileEntity entity = new FileEntity(file);
            put.setEntity(entity);
            System.out.print(entity);

            try (CloseableHttpResponse response = client.execute(put)) {
                response.setHeader(HttpHeaders.AUTHORIZATION, basicAuthHeader(user,password));


                String loadResult = "";
                if (response.getEntity() != null) {
                    loadResult = EntityUtils.toString(response.getEntity());
                }

                final int statusCode = response.getStatusLine().getStatusCode();
                if (statusCode != 200) {
                    throw new IOException(
                            String.format("Stream load failed. status: %s load result: %s", statusCode, loadResult));
                }

               log.info("Get load result: {}" , loadResult);

                return JSON.parseObject(loadResult,StreamLoadResult.class);
            }
        }
    }

    public static void main(String[] args) throws IOException {
        DorisStreamLoadClient dorisStreamLoadClient = new DorisStreamLoadClient();
        StreamLoadResult streamLoadResult = dorisStreamLoadClient.putData(new File("C:\\home\\doris_stream_load\\update_dwdPublicOpinionData.csv"), "analysis", "dwd_public_opinion_data");
        System.out.println(streamLoadResult);
    }

    @Data
    public static class StreamLoadResult {
        private long Txnid;
        private String Label;
        private String Comment;
        private boolean TwoPhaseCommit;
        private String Status;
        private String Message;
        private long numberTotalRows;
        private long numberLoadedRows;
        private long numberFilteredRows;
        private long numberUnselectedRows;
        private long loadBytes;
        private long loadTimeMs;
        private long beginTxnTimeMs;
        private long streamLoadPutTimeMs;
        private long readDataTimeMs;
        private long writeDataTimeMs;
        private long commitAndPublishTimeMs;
    }
}

 使用:


public void test(Object dwdPublicOpinionDataList){

try {
String jsonString = JSON.toJSONString(dwdPublicOpinionDataList);

FileUtils.write(new File(path), jsonString, "utf-8", true);
DorisStreamLoadClient.StreamLoadResult streamLoadResult = dorisStreamLoadClient.putData(new File(path), "analysis", "dwd_public_opinion_data");
if ("Success".equals(streamLoadResult.getStatus())) {
//成功后的逻辑
}

} catch (
IOException e) {
log.error("dorisStreamLoadClient{}失败 :{}", path, e);
} finally {
try {
FileUtils.delete(new File(path));
} catch (IOException e) {
log.error("删除{}失败 :{}", path, e);
return;
}
}

  

posted @ 2024-06-18 18:06  夜半钟声到客船  阅读(230)  评论(0编辑  收藏  举报