剑道第一仙

导航

< 2025年3月 >
23 24 25 26 27 28 1
2 3 4 5 6 7 8
9 10 11 12 13 14 15
16 17 18 19 20 21 22
23 24 25 26 27 28 29
30 31 1 2 3 4 5

统计

chatgpt使用记录

网址:http://chat.yangxiwl.cn/chatbotai/1002?channel=bdtoufangChatztHD3

写一个java程序,要求将kafka的数据消费批量插入的clickhouse数据库

你可以使用Kafka的Java客户端库和ClickHouse的Java驱动来实现将Kafka数据消费并批量插入到ClickHouse数据库中的功能。以下是一个简单的示例程序:

复制代码
import org.apache.kafka.clients.consumer.ConsumerConfig;
import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.clients.consumer.ConsumerRecords;
import org.apache.kafka.clients.consumer.KafkaConsumer;
import org.apache.kafka.common.serialization.StringDeserializer;
import ru.yandex.clickhouse.BalancedClickhouseDataSource;
import ru.yandex.clickhouse.ClickHouseConnection;
import ru.yandex.clickhouse.ClickHousePreparedStatement;
import ru.yandex.clickhouse.settings.ClickHouseProperties;

import java.sql.SQLException;
import java.util.*;

public class KafkaToClickHouse {

    public static void main(String[] args) {
        // Kafka配置
        Properties kafkaProps = new Properties();
        kafkaProps.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092");
        kafkaProps.put(ConsumerConfig.GROUP_ID_CONFIG, "clickhouse-consumer");
        kafkaProps.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getName());
        kafkaProps.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getName());
        kafkaProps.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");

        // ClickHouse配置
        ClickHouseProperties clickHouseProps = new ClickHouseProperties();
        clickHouseProps.setHost("localhost");
        clickHouseProps.setPort(8123);
        clickHouseProps.setDatabase("your_database_name");
        clickHouseProps.setUser("your_username");
        clickHouseProps.setPassword("your_password");

        // Kafka消费者
        KafkaConsumer<String, String> consumer = new KafkaConsumer<>(kafkaProps);
        consumer.subscribe(Collections.singletonList("your_topic_name"));

        // ClickHouse连接池
        BalancedClickhouseDataSource dataSource = new BalancedClickhouseDataSource("jdbc:clickhouse://", clickHouseProps);

        // 消费并插入数据
        try (ClickHouseConnection connection = dataSource.getConnection();
             ClickHousePreparedStatement stmt = connection.prepareStatement("INSERT INTO your_table_name (col1, col2) VALUES (?, ?)")) {
            while (true) {
                ConsumerRecords<String, String> records = consumer.poll(Duration.ofMillis(100));
                for (ConsumerRecord<String, String> record : records) {
                    String[] data = record.value().split(","); // 假设数据格式为逗号分隔的两列
                    stmt.setString(1, data[0]);
                    stmt.setString(2, data[1]);
                    stmt.addBatch();
                }
                stmt.executeBatch();
            }
        } catch (SQLException e) {
            e.printStackTrace();
        } finally {
            consumer.close();
            dataSource.close();
        }
    }
}
复制代码

 

下面是一个示例程序,它将Kafka的数据按照每100条记录进行批量插入到ClickHouse数据库,并在每个批次结束后提交事务

复制代码
import org.apache.kafka.clients.consumer.ConsumerConfig;
import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.clients.consumer.ConsumerRecords;
import org.apache.kafka.clients.consumer.KafkaConsumer;
import org.apache.kafka.common.serialization.StringDeserializer;
import ru.yandex.clickhouse.BalancedClickhouseDataSource;
import ru.yandex.clickhouse.ClickHouseConnection;
import ru.yandex.clickhouse.ClickHousePreparedStatement;
import ru.yandex.clickhouse.settings.ClickHouseProperties;

import java.sql.SQLException;
import java.time.Duration;
import java.util.ArrayList;
import java.util.List;
import java.util.Properties;

public class KafkaToClickHouse {

    public static void main(String[] args) {
        // Kafka配置
        Properties kafkaProps = new Properties();
        kafkaProps.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092");
        kafkaProps.put(ConsumerConfig.GROUP_ID_CONFIG, "clickhouse-consumer");
        kafkaProps.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getName());
        kafkaProps.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getName());
        kafkaProps.put(ConsumerConfig.MAX_POLL_RECORDS_CONFIG, 100); // 每次最多拉取100条记录

        // ClickHouse配置
        ClickHouseProperties clickHouseProps = new ClickHouseProperties();
        clickHouseProps.setHost("localhost");
        clickHouseProps.setPort(8123);
        clickHouseProps.setDatabase("your_database_name");
        clickHouseProps.setUser("your_username");
        clickHouseProps.setPassword("your_password");

        // Kafka消费者
        KafkaConsumer<String, String> consumer = new KafkaConsumer<>(kafkaProps);
        consumer.subscribe(Collections.singletonList("your_topic_name"));

        // ClickHouse连接池
        BalancedClickhouseDataSource dataSource = new BalancedClickhouseDataSource("jdbc:clickhouse://", clickHouseProps);

        // 消费并插入数据
        try (ClickHouseConnection connection = dataSource.getConnection()) {
            List<String> batch = new ArrayList<>();
            while (true) {
                ConsumerRecords<String, String> records = consumer.poll(Duration.ofMillis(100));
                for (ConsumerRecord<String, String> record : records) {
                    String[] data = record.value().split(","); // 假设数据格式为逗号分隔的两列
                    String row = String.format("('%s', '%s')", data[0], data[1]);
                    batch.add(row);
                }
                if (batch.size() >= 100) {
                    insertBatch(connection, batch);
                    batch.clear();
                }
            }
        } catch (SQLException e) {
            e.printStackTrace();
        } finally {
            consumer.close();
            dataSource.close();
        }
    }

    private static void insertBatch(ClickHouseConnection connection, List<String> batch) throws SQLException {
        try (ClickHousePreparedStatement stmt = connection.prepareStatement("INSERT INTO your_table_name (col1, col2) VALUES " + String.join(",", batch))) {
            stmt.execute();
        }
    }
}
复制代码

 如何实现批量提交的同时避免重复插入

复制代码
import org.apache.kafka.clients.consumer.ConsumerConfig;
import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.clients.consumer.ConsumerRecords;
import org.apache.kafka.clients.consumer.KafkaConsumer;
import org.apache.kafka.common.serialization.StringDeserializer;
import ru.yandex.clickhouse.BalancedClickhouseDataSource;
import ru.yandex.clickhouse.ClickHouseConnection;
import ru.yandex.clickhouse.ClickHousePreparedStatement;
import ru.yandex.clickhouse.settings.ClickHouseProperties;

import java.sql.SQLException;
import java.time.Duration;
import java.util.*;

public class KafkaToClickHouse {

    public static void main(String[] args) {
        // Kafka配置
        Properties kafkaProps = new Properties();
        kafkaProps.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092");
        kafkaProps.put(ConsumerConfig.GROUP_ID_CONFIG, "clickhouse-consumer");
        kafkaProps.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getName());
        kafkaProps.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getName());
        kafkaProps.put(ConsumerConfig.MAX_POLL_RECORDS_CONFIG, 100); // 每次最多拉取100条记录

        // ClickHouse配置
        ClickHouseProperties clickHouseProps = new ClickHouseProperties();
        clickHouseProps.setHost("localhost");
        clickHouseProps.setPort(8123);
        clickHouseProps.setDatabase("your_database_name");
        clickHouseProps.setUser("your_username");
        clickHouseProps.setPassword("your_password");

        // Kafka消费者
        KafkaConsumer<String, String> consumer = new KafkaConsumer<>(kafkaProps);
        consumer.subscribe(Collections.singletonList("your_topic_name"));

        // ClickHouse连接池
        BalancedClickhouseDataSource dataSource = new BalancedClickhouseDataSource("jdbc:clickhouse://", clickHouseProps);

        // 消费并插入数据
        try (ClickHouseConnection connection = dataSource.getConnection()) {
            List<String> batch = new ArrayList<>();
            while (true) {
                ConsumerRecords<String, String> records = consumer.poll(Duration.ofMillis(100));
                for (ConsumerRecord<String, String> record : records) {
                    String[] data = record.value().split(","); // 假设数据格式为逗号分隔的两列
                    String row = String.format("('%s', '%s')", data[0], data[1]);
                    batch.add(row);
                }
                if (batch.size() >= 100) {
                    insertBatch(connection, batch);
                    batch.clear();
                }
            }
        } catch (SQLException e) {
            e.printStackTrace();
        } finally {
            consumer.close();
            dataSource.close();
        }
    }

    private static void insertBatch(ClickHouseConnection connection, List<String> batch) throws SQLException {
        try (ClickHousePreparedStatement stmt = connection.prepareStatement("INSERT INTO your_table_name (col1, col2) VALUES " + String.join(",", batch))) {
            stmt.execute();
        } catch (SQLException e) {
            // 处理重复插入的异常
            if (e.getMessage().contains("Code: 409")) {
                // 重复插入数据,可以忽略或者进行其他处理
            } else {
                throw e;
            }
        }
    }
}
复制代码

 

posted on   剑道第一仙  阅读(21)  评论(0编辑  收藏  举报

相关博文:
阅读排行:
· TypeScript + Deepseek 打造卜卦网站:技术与玄学的结合
· 阿里巴巴 QwQ-32B真的超越了 DeepSeek R-1吗?
· 【译】Visual Studio 中新的强大生产力特性
· 【设计模式】告别冗长if-else语句:使用策略模式优化代码结构
· 10年+ .NET Coder 心语 ── 封装的思维:从隐藏、稳定开始理解其本质意义
历史上的今天:
2022-10-10 Flink使用异常处理
2022-10-10 使用log4j2的jar包报错:ERROR StatusLogger Unrecognized conversion specifier [d]
2022-10-10 Eclipse将Maven项目打成jar包以及java用命令运行jar包指定类下的main方法
点击右上角即可分享
微信分享提示