python操作kafka

有两个模块都可以使用操作kafka数据

kafka模块 和 pykafka模块

注意kafka会将hosts转换成域名的形式，注意要将hosts及域名配置到docker和主机的/etc/hosts文件中

一、kafka模块

支持版本：

from kafka import KafkaConsumer
import time

kafka_hosts = ["192.168.3.145:49154"]
kafka_topic = "topic1"
consumer = KafkaConsumer(kafka_topic, group_id="test_aaa", bootstrap_servers=kafka_hosts, ssl_check_hostname=False,
                         api_version=(0, 9))  # 要设置api_version否则可能会报错，如果没有使用ssl认证设置为False

# 方式一：

print("t1", time.time())  # 记录拉取时间
while True:
    print("t2", time.time())
    msg = consumer.poll(timeout_ms=100, max_records=5)  # 从kafka获取消息，每0.1秒拉取一次，单次拉取5条
    print(len(msg))

    for i in msg.values():
        for k in i:
            print(k.offset, k.value)
    time.sleep(1)

# 方式二：

for message in consumer:
    if message is not None:
        print("%s:%d:%d: key=%s value=%s" % (message.topic, message.partition,
                                             message.offset, message.key,
                                             message.value.decode()))
    else:
        print("30s内未接受到数据")

二、pykafka模块消费者

from pykafka import KafkaClient


class KafkaTest(object):
    def __init__(self, host):
        self.host = host
        self.client = KafkaClient(hosts=self.host)
        print(self.client.topics)  # 所有的topic
        print(self.client.brokers)  # kafka的所有的brokers

    def balance_consumer(self, topic, offset=0):
        """
        使用balance consumer去消费kafka
        :return:
        """

        topic = self.client.topics[topic.encode()]
        # managed=True 设置后，使用新式reblance分区方法，不需要使用zk，而False是通过zk来实现reblance的需要使用zk,必须指定 # zookeeper_connect = "zookeeperIp",consumer_group='test_group',
        consumer = topic.get_balanced_consumer(
            auto_commit_enable=True,
            managed=True,
            # managed=False,
            # zookeeper_connect="10.111.64.225:2181",
            consumer_group=b'HpDailyDataConsumerGroup',
            consumer_timeout_ms=300000
        )

        partitions = topic.partitions
        print("所有的分区：{}".format(partitions))
        earliest_offsets = topic.earliest_available_offsets()
        print("最早可用offset：{}".format(earliest_offsets))
        last_offsets = topic.latest_available_offsets()
        print("最近可用offset：{}".format(last_offsets))
        offset = consumer.held_offsets
        print("当前消费者分区offset情况：{}".format(offset))
        while True:
            msg = consumer.consume()
            if msg:
                offset = consumer.held_offsets
                print("当前位移：{}".format(offset))
                # result.append(eval(msg.value.decode()))
                print(msg.value.decode())
                consumer.commit_offsets()  # commit一下

            else:
                print("没有数据")



if __name__ == '__main__':
    host = '192.168.3.145:49154'
    # host = 'c6140sv02:6667'
    kafka_ins = KafkaTest(host)
    topic = 'topic1'
    # topic = 'topic_jsonctr_collector_ws_data_daily'
    kafka_ins.balance_consumer(topic)

三、pykafka生产者

from common.config import KAFKA_HOST
from pykafka import KafkaClient


def send_data_kafka(send_value, pro_topic):
    # 数据发送到kafka
    try:
        client = KafkaClient(hosts=KAFKA_HOST)
        topic = client.topics[pro_topic]
        send_value = str(send_value)
        # sync_producer = topic.get_sync_producer() # 异步生产
        producer = topic.get_producer()  # 同步生产
        producer.produce(bytes(send_value, encoding='utf-8'))
        producer.stop()
        logger.info("发送数据到kafka成功")

    except Exception as e:
        logger.error("生产kafka数据失败:%s, %s" % (e.__traceback__.tb_lineno, traceback.format_exc()))

posted @ 2021-09-14 17:19 Mr沈阅读(4450) 评论(0) 编辑收藏举报

刷新页面返回顶部

Mr.沈的技术栈

python操作kafka

python操作kafka

一、kafka模块

二、pykafka模块消费者

三、pykafka生产者

公告