python操作kafka
python操作kafka
有两个模块都可以使用操作kafka数据
kafka模块 和 pykafka模块
注意kafka会将hosts转换成域名的形式,注意要将hosts及域名配置到docker和主机的/etc/hosts文件中
一、kafka模块
支持版本:
from kafka import KafkaConsumer import time kafka_hosts = ["192.168.3.145:49154"] kafka_topic = "topic1" consumer = KafkaConsumer(kafka_topic, group_id="test_aaa", bootstrap_servers=kafka_hosts, ssl_check_hostname=False, api_version=(0, 9)) # 要设置api_version否则可能会报错,如果没有使用ssl认证设置为False # 方式一: print("t1", time.time()) # 记录拉取时间 while True: print("t2", time.time()) msg = consumer.poll(timeout_ms=100, max_records=5) # 从kafka获取消息,每0.1秒拉取一次,单次拉取5条 print(len(msg)) for i in msg.values(): for k in i: print(k.offset, k.value) time.sleep(1) # 方式二: for message in consumer: if message is not None: print("%s:%d:%d: key=%s value=%s" % (message.topic, message.partition, message.offset, message.key, message.value.decode())) else: print("30s内未接受到数据")
二、pykafka模块消费者
from pykafka import KafkaClient class KafkaTest(object): def __init__(self, host): self.host = host self.client = KafkaClient(hosts=self.host) print(self.client.topics) # 所有的topic print(self.client.brokers) # kafka的所有的brokers def balance_consumer(self, topic, offset=0): """ 使用balance consumer去消费kafka :return: """ topic = self.client.topics[topic.encode()] # managed=True 设置后,使用新式reblance分区方法,不需要使用zk,而False是通过zk来实现reblance的需要使用zk,必须指定 # zookeeper_connect = "zookeeperIp",consumer_group='test_group', consumer = topic.get_balanced_consumer( auto_commit_enable=True, managed=True, # managed=False, # zookeeper_connect="10.111.64.225:2181", consumer_group=b'HpDailyDataConsumerGroup', consumer_timeout_ms=300000 ) partitions = topic.partitions print("所有的分区:{}".format(partitions)) earliest_offsets = topic.earliest_available_offsets() print("最早可用offset:{}".format(earliest_offsets)) last_offsets = topic.latest_available_offsets() print("最近可用offset:{}".format(last_offsets)) offset = consumer.held_offsets print("当前消费者分区offset情况:{}".format(offset)) while True: msg = consumer.consume() if msg: offset = consumer.held_offsets print("当前位移:{}".format(offset)) # result.append(eval(msg.value.decode())) print(msg.value.decode()) consumer.commit_offsets() # commit一下 else: print("没有数据") if __name__ == '__main__': host = '192.168.3.145:49154' # host = 'c6140sv02:6667' kafka_ins = KafkaTest(host) topic = 'topic1' # topic = 'topic_jsonctr_collector_ws_data_daily' kafka_ins.balance_consumer(topic)
三、pykafka生产者
from common.config import KAFKA_HOST from pykafka import KafkaClient def send_data_kafka(send_value, pro_topic): # 数据发送到kafka try: client = KafkaClient(hosts=KAFKA_HOST) topic = client.topics[pro_topic] send_value = str(send_value) # sync_producer = topic.get_sync_producer() # 异步生产 producer = topic.get_producer() # 同步生产 producer.produce(bytes(send_value, encoding='utf-8')) producer.stop() logger.info("发送数据到kafka成功") except Exception as e: logger.error("生产kafka数据失败:%s, %s" % (e.__traceback__.tb_lineno, traceback.format_exc()))