Kafka-生产者/消费者
一 命令行操作Kafka
1 查询topic,进入kafka目录:
./bin/kafka-topics.sh --list --zookeeper localhost:2181
2 查询topic内容:
./bin/kafka-console-consumer.sh --bootstrap-server localhost:9092 --topic topicName --from-beginning
二 Python脚本操作Kafka
1 安装Kafka库
# 推荐安装 pip install kafka-python # 不兼容python3.8 pip install kafka
2 生产者
import json from kafka import KafkaProducer producer = KafkaProducer(bootstrap_servers='192.168.2.230:9092') msg_dict = { "operatorId":"test", "terminalId":"123", "terminalCode":"123", "terminalNo":"1" } msg = json.dumps(msg_dict).encode() #这里加了encode 进行了编码,不然会报错:使用KafkaProducer的send函数时,会报assert type(value_bytes) in (bytes, bytearray, memoryview, type(None))错误原因:send函数的value_bytes是str类型 producer.send('tqs-admin-event-1', msg) producer.close() print("结束")
2 消费者
from kafka import KafkaConsumer consumer = KafkaConsumer('test', bootstrap_servers=['172.21.10.136:9092']) for message in consumer: print ("%s:%d:%d: key=%s value=%s" % (message.topic, message.partition, message.offset, message.key, message.value))
3 实际应用
from kafka import KafkaProducer, KafkaConsumer from kafka.errors import kafka_errors import traceback import json def producer_demo(): # 假设生产的消息为键值对(不是一定要键值对),且序列化方式为json producer = KafkaProducer( bootstrap_servers=['localhost:9092'], key_serializer=lambda k: json.dumps(k).encode(), value_serializer=lambda v: json.dumps(v).encode()) # 发送三条消息 for i in range(0, 3): future = producer.send( 'kafka_demo', key='count_num', # 同一个key值,会被送至同一个分区 value=str(i), partition=1) # 向分区1发送消息 print("send {}".format(str(i))) try: future.get(timeout=10) # 监控是否发送成功 except kafka_errors: # 发送失败抛出kafka_errors traceback.format_exc() def consumer_demo(): consumer = KafkaConsumer( 'kafka_demo', bootstrap_servers=':9092', group_id='test' ) for message in consumer: print("receive, key: {}, value: {}".format( json.loads(message.key.decode()), json.loads(message.value.decode()) ) )
3 查看kafka堆积剩余量
在线环境中,需要保证消费者的消费速度大于生产者的生产速度,所以需要检测kafka中的剩余堆积量是在增加还是减小。可以用如下代码,观测队列消息剩余量:
consumer = KafkaConsumer(topic, **kwargs) partitions = [TopicPartition(topic, p) for p in consumer.partitions_for_topic(topic)] print("start to cal offset:") # total toff = consumer.end_offsets(partitions) toff = [(key.partition, toff[key]) for key in toff.keys()] toff.sort() print("total offset: {}".format(str(toff))) # current coff = [(x.partition, consumer.committed(x)) for x in partitions] coff.sort() print("current offset: {}".format(str(coff))) # cal sum and left toff_sum = sum([x[1] for x in toff]) cur_sum = sum([x[1] for x in coff if x[1] is not None]) left_sum = toff_sum - cur_sum print("kafka left: {}".format(left_sum))
本文来自博客园,作者:术科术,转载请注明原文链接:https://www.cnblogs.com/shukeshu/p/16200322.html