kafka_API笔记
kafkaserver.properties配置文件
server.properties
#server.properties
#broker的全局唯一编号,不能重复
broker.id=0
#删除topic功能使能,当前版本此配置默认为true,已从配置文件移除
delete.topic.enable=true
#处理网络请求的线程数量
num.network.threads=3
#用来处理磁盘IO的线程数量
num.io.threads=8
#发送套接字的缓冲区大小
socket.send.buffer.bytes=102400
#接收套接字的缓冲区大小
socket.receive.buffer.bytes=102400
#请求套接字的缓冲区大小
socket.request.max.bytes=104857600
#kafka运行日志(数据)存放的路径
log.dirs=/opt/module/kafka/datas
#topic在当前broker上的分区个数
num.partitions=1
#用来恢复和清理data下数据的线程数量
num.recovery.threads.per.data.dir=1
#segment文件保留的最长时间,超时将被删除
log.retention.hours=168
#日志段文件的最大大小。当达到这个大小时,将创建一个新的日志段。默认1G
log.segment.bytes=1073741824
#配置连接Zookeeper集群地址
zookeeper.connect=hadoop102:2181,hadoop103:2181,hadoop104:2181/kafka
############################# Group Coordinator Settings #############################
#下面的配置指定了GroupCoordinator将延迟初始消费者再平衡的时间,以毫秒为单位。
#当新成员加入组时,rebalance将被group.initial.rebalance.delay.ms的值进一步延迟,最大值为max.poll.interval.ms。
#默认值是3秒。
#我们在这里将其改写为0,因为它为开发和测试提供了更好的开箱即用的体验。
#然而,在生产环境中,默认值3秒更合适,因为这将有助于避免在应用程序启动期间不必要的,可能昂贵的重新平衡。
group.initial.rebalance.delay.ms=0
kafka生产者配置文件producer.properties关键配置解释:
producer.properties
############################# Producer Basics #############################
# list of brokers used for bootstrapping knowledge about the rest of the cluster
# format: host1:port1,host2:port2 ...
bootstrap.servers=localhost:9092
# 指定所有生成数据的压缩编解码器: none, gzip, snappy, lz4, zstd
compression.type=none
#自定义分区器
# name of the partitioner class for partitioning events; default partition spreads data randomly
#partitioner.class=
#指定了生产者在发送数据时等待服务器返回响应的时间。
# the maximum amount of time the client will wait for the response of a request
#request.timeout.ms=
#该配置控制 KafkaProducer.send() 和 KafkaProducer.partitionsFor() 将阻塞多长时间。
#此外这些方法被阻止,也可能是因为缓冲区已满或元数据不可用。在用户提供的序列化程序或分区器中的锁定不会计入此超时。默认为60000ms。
# how long `KafkaProducer.send` and `KafkaProducer.partitionsFor` will block for
#max.block.ms=
#等待时间
# the producer will wait for up to the given delay to allow other records to be sent so that the sends can be batched together
#linger.ms=
#这个参数决定了每次发送给Kafka服务器请求的最大大小
# the maximum size of a request in bytes
#max.request.size=
#批次大小 一个批次拿多少数据
# the default batch size in bytes when batching multiple records sent to a partition
#batch.size=
#缓冲区大小 默认32M
# the total bytes of memory the producer can use to buffer records waiting to be sent to the server
#buffer.memory=
生产者API :配置的参数在生产者配置文件里面都有
KafkaProducer:需要创建一个生产者对象,用来发送数据
ProducerConfig:获取所需的一系列配置参数
ProducerRecord:每条数据都要封装成一个ProducerRecord对象
简单生产者
public static void main (String[] args){
Properties properties= new Properties();
//数据发哪里
properties.put("bootstrap.servers","Ava01:9092");
//一批数据拿多少 默认16k
properties.put("batch.size","16384");
//等待时间 数据大小够了或者是等待时间到了 就会发送这个批次的数据
properties.put("linger.ms","1");
//缓冲区大小 默认32m
properties.put("buffer.memory",33554432);
//kv的序列化 序列化在配置文件中没有体现 为了满足kafka的特殊数据格式
properties.put("key.serializer","org.apache.kafka.common.serialization.StringSerializer");
properties.put("value.serializer","org.apache.kafka.common.serialization.StringSerializer");
//创建kakfa的生产者对象 将上述配置参数传入
KafkaProducer<String,String> kafkaProducer= new KafkaProducer<>(properties);
//使用send方法发送 数据类型要封装到ProducerRecord中
for(int i= 0;i<100;i++){
Future<RecordMetadata> first =
kafkaProducer.send(new ProducerRecord<String,String>("test","test"+i));
}
//关闭资源
kafkaProducer.close();
}
生产者上传数据分区策略,回执函数,同步发送,异步发送:
生产者的分区策略
package CustomProducer;
import org.apache.kafka.clients.producer.*;
import java.util.Properties;
/* 生产者练习*/
public class ProducerExe {
public static void main(String[] args) {
Properties properties = new Properties();
//使用封装好的enum传递参数设置
properties.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, "Ava01:9092");
properties.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringSerializer");
properties.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringSerializer");
properties.put(ProducerConfig.ACKS_CONFIG, "all");
properties.put(ProducerConfig.LINGER_MS_CONFIG, "1");
properties.put(ProducerConfig.BATCH_SIZE_CONFIG, "16384");
properties.setProperty(ProducerConfig.BUFFER_MEMORY_CONFIG, "33554432");
//producer发送消息的重试次数
properties.put("retries", 3);
KafkaProducer<String, String> kafkaProducer = new KafkaProducer<>(properties);
for (int i = 0; i < 26; i++) {
//未指定分区 使用默认的粘性分区
//ProducerRecord<String, String> producerRecord = new ProducerRecord<>("test", "Exe-0" + i);
//指定分区发送 将数据发送到topic的分区1
//ProducerRecord<String, String> producerRecord1 =new ProducerRecord<String,String>("test",0,"ooo","Exe_0"+i);
//不指定分区 指定key 通过key.hascode()%partitions nums--->? ?就是分区
ProducerRecord<String, String> producerRecord2 = new ProducerRecord<>("test", "zzz", "Exe-01" + 1);
kafkaProducer.send(producerRecord2, new Callback() {
@Override
public void onCompletion(RecordMetadata metadata, Exception exception) {
if (exception == null) {
System.out.println(
producerRecord2.value() + " offset: " + metadata.offset() + " topic: " + metadata.topic() + " partition" +
" " + metadata.partition());
} else {
exception.printStackTrace();
}
}
}) ; //.get(); 使用的同步发送
}
kafkaProducer.close();
}
}
自定义分区器
自定义分区器实现:
1. 实现接口Partitioner
2. 实现3个方法:partition,close,configure
3. 编写partition方法,返回分区号
MyPartition
package PartitionDIY;
import org.apache.kafka.clients.producer.Partitioner;
import org.apache.kafka.common.Cluster;
import java.util.Map;
public class MyPartition implements Partitioner {
/**
* \ 根据key来确定将数据发送到哪个分区
*
* @param topic topic名字
* @param key Producer 传过来的key
* @param keyBytes 可以的字节数组
* @param value value
* @param valueBytes value的字节数组
* @param cluster 集群元数据 可以查看分区信息
* @return 返回分区编号
*/
@Override
public int partition(String topic, Object key, byte[] keyBytes, Object value, byte[] valueBytes, Cluster cluster) {
String string = key.toString();
if (string.contains("zzz")) {
return 1;
} else if (string.contains("jjj")) {
return 2;
} else {
return 0;
}
}
@Override
public void close() {
}
@Override
public void configure(Map<String, ?> configs) {
}
}
自定义分区器测试
MypartitionTest
package PartitionDIY;
import org.apache.kafka.clients.producer.*;
import java.util.Properties;
/*生产者测试自定义分区器 */
public class MypartitionTest {
public static void main(String[] args) {
Properties properties = new Properties();
properties.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, "Ava01:9092");
properties.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringSerializer");
properties.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringSerializer");
properties.put(ProducerConfig.ACKS_CONFIG, "all");
properties.put(ProducerConfig.LINGER_MS_CONFIG, "1");
properties.put(ProducerConfig.BATCH_SIZE_CONFIG, "16384");
properties.setProperty(ProducerConfig.BUFFER_MEMORY_CONFIG, "33554432");
//producer发送消息的重试次数
properties.put("retries", 3);
//指定使用的自定义分区器
properties.put(ProducerConfig.PARTITIONER_CLASS_CONFIG, "PartitionDIY.MyPartition");
KafkaProducer<String, String> kafkaProducer = new KafkaProducer<>(properties);
for (int i = 0; i < 100; i++) {
ProducerRecord<String, String> record;
if (i % 3 == 0) {
record = new ProducerRecord<>("test", "zzz", "MyPartitionTest--->" + i);
} else if (i % 3 == 1) {
record = new ProducerRecord<>("test", "jjj", "MyPartitionTest--->" + i);
} else {
record = new ProducerRecord<>("test", "qqq", "MyPartitionTest--->" + i);
}
kafkaProducer.send(record, new Callback() {
@Override
public void onCompletion(RecordMetadata metadata, Exception exception) {
System.out.println("value: " + record.value() + " offset: " + metadata.offset() + " topic: " + metadata.topic() + " partition" +
" " + metadata.partition());
}
});
}
kafkaProducer.close();
}
}
消费者配置文件
consumer.properties 关键配置
#consumer.properties 关键配置
# list of brokers used for bootstrapping knowledge about the rest of the cluster
# format: host1:port1,host2:port2 ...
bootstrap.servers=localhost:9092
# consumer group id
group.id=test-consumer-group
# What to do when there is no initial offset in Kafka or if the current
# offset does not exist any more on the server: latest, earliest, none
#auto.offset.reset=
消费者API
KafkaConsumer:创建一个消费者对象用来poll数据
ConsumerConfig:获取消费者的配置信息
ConsumerRecords:消费者对象poll回来的数据是ConsumerRecord格式的一批数据
简单一个消费者
package ConsumerGroup;
import org.apache.kafka.clients.consumer.ConsumerConfig;
import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.clients.consumer.ConsumerRecords;
import org.apache.kafka.clients.consumer.KafkaConsumer;
import java.time.Duration;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.Properties;
/* 同一个消费者组的 Consumer1 */
public class ConsumerExe1 {
public static void main(String[] args) {
Properties properties = new Properties();
properties.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, "Ava01:9092");
properties.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringDeserializer");
properties.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringDeserializer");
//offset重置设定 默认是latest
properties.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "latest");
// 是否自动提交offset
properties.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, "true");
// 提交offset的时间周期 默认是5000 会影响到offset提交的密度
properties.put(ConsumerConfig.AUTO_COMMIT_INTERVAL_MS_CONFIG, "1000");
// 修改分区分配策略 从默认的 Range 修改成 RoundRobin 轮询
properties.put(ConsumerConfig.PARTITION_ASSIGNMENT_STRATEGY_CONFIG, "org.apache.kafka.clients.consumer.RoundRobinAssignor");
//api使用消费者必须添加消费者组
properties.put(ConsumerConfig.GROUP_ID_CONFIG, "test");
KafkaConsumer<String, String> kafkaConsumer = new KafkaConsumer<>(properties);
ArrayList<String> list = new ArrayList<>();
list.add("test");
//消费者订阅哪个topic 可以订阅多个 传入collection
kafkaConsumer.subscribe(list);
//消费者 需要一直打开在topic里poll数据
//pull 通过轮询 API(poll) 向服务器定时请求数据。一旦消费者订阅了主题,轮询就会处理所有的细节,
// 包括群组协调、分区再均衡、发送心跳和获取数据,这使得开发者只需要关注从分区返回的数据,然后进行业务处理
while (true) {
ConsumerRecords<String, String> consumerRecords = kafkaConsumer.poll(Duration.ofSeconds(1));
Iterator<ConsumerRecord<String, String>> iterator = consumerRecords.iterator();
while (iterator.hasNext()) {
ConsumerRecord<String, String> next = iterator.next();
//System.out.println(next);
System.out.println("offset: " + next.offset() + " partition: " + next.partition() + " value: " + next.value()
+ " topic: " + next.topic()
);
}
}
}
}
手动提交offset的消费者
虽然自动提交offset十分简介便利,但由于其是基于时间提交的,开发人员难以把握offset提交的时机。因此Kafka还提供了手动提交offset的API。
手动提交offset的方法有两种:分别是commitSync(同步提交)和commitAsync(异步提交)。
两者的相同点是,都会将本次poll的一批数据最高的偏移量提交;
不同点是,commitSync阻塞当前线程,一直到提交成功,并且会自动失败重试(由不可控因素导致,也会出现提交失败);而commitAsync则没有失败重试机制,故有可能提交失败。
手动提交offset_同步提交
package CustomerTest;
import org.apache.kafka.clients.consumer.ConsumerConfig;
import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.clients.consumer.ConsumerRecords;
import org.apache.kafka.clients.consumer.KafkaConsumer;
import java.time.Duration;
import java.util.Arrays;
import java.util.Iterator;
import java.util.Properties;
/*
手动提交offset */
public class ConsumerExeByHand {
public static void main(String[] args) {
Properties properties = new Properties();
properties.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, "Ava01:9092");
properties.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringDeserializer");
properties.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringDeserializer");
//offset重置设定 默认是latest
properties.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "latest");
//关闭自动提交
properties.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, "false");
properties.put(ConsumerConfig.GROUP_ID_CONFIG, "test");
KafkaConsumer<String, String> kafkaConsumer = new KafkaConsumer<>(properties);
kafkaConsumer.subscribe(Arrays.asList("test"));
while (true) {
ConsumerRecords<String, String> consumerRecords = kafkaConsumer.poll(Duration.ofSeconds(1));
Iterator<ConsumerRecord<String, String>> iterator = consumerRecords.iterator();
while (iterator.hasNext()) {
ConsumerRecord<String, String> next = iterator.next();
System.out.println(next.toString());
//同步提交
kafkaConsumer.commitSync();
}
}
}
}
手动提交offset_异步提交
package CustomerTest;
import org.apache.kafka.clients.consumer.*;
import org.apache.kafka.common.TopicPartition;
import java.time.Duration;
import java.util.*;
/*
手动提交offset */
public class ConsumerExeByHand {
public static void main(String[] args) {
Properties properties = new Properties();
properties.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, "Ava01:9092");
properties.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringDeserializer");
properties.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringDeserializer");
//offset重置设定 默认是latest
properties.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "latest");
//关闭自动提交
properties.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, "false");
properties.put(ConsumerConfig.GROUP_ID_CONFIG, "test");
KafkaConsumer<String, String> kafkaConsumer = new KafkaConsumer<>(properties);
kafkaConsumer.subscribe(Arrays.asList("test"));
while (true) {
ConsumerRecords<String, String> consumerRecords = kafkaConsumer.poll(Duration.ofSeconds(1));
Iterator<ConsumerRecord<String, String>> iterator = consumerRecords.iterator();
while (iterator.hasNext()) {
ConsumerRecord<String, String> next = iterator.next();
System.out.println("topic: " + next.topic() + " value: " + next.value() + "partition: " + next.partition());
//同步提交
//kafkaConsumer.commitSync();
//异步提交 带回调函数的
kafkaConsumer.commitAsync(new OffsetCommitCallback() {
@Override
public void onComplete(Map<TopicPartition, OffsetAndMetadata> offsets, Exception exception) {
if (exception == null) {
//获取offset信息
Set<Map.Entry<TopicPartition, OffsetAndMetadata>> entrySet = offsets.entrySet();
Iterator<Map.Entry<TopicPartition, OffsetAndMetadata>> entryIterator = entrySet.iterator();
while (entryIterator.hasNext()) {
Map.Entry<TopicPartition, OffsetAndMetadata> entry = entryIterator.next();
//System.out.println(entry.toString());
TopicPartition key = entry.getKey();
OffsetAndMetadata value = entry.getValue();
System.out.println("Callback: " + " topic: " + key.topic() + " partition: " + key.partition() + " offset: " + value.offset());
//System.out.println(key.toString());
//System.out.println(value.toString());
}
} else {
exception.printStackTrace();
}
}
});
}
}
}
}
绝不摆烂