flume配置
# 1. spooldir
a1.sources.r1.type = spooldir
al.sources.r1.spoolDir = PATH #d:/home/soft
# 2. NetCat Source
a1.sources.r1.type = netcat
a1.sources.r1.bind = host
a1.sources.r1.port = 7777
# 3.Avro Source
a1.sources.r1.type = avro
a1.sources.r1.bind = 0.0.0.0
a1.sources.r1.port = 44444
# 4.HTTP Source
a1.sources.r1.type = http
a1.sources.r1.port = 66666
vim flume-kafka.conf
#########################
#定义Agent必需的组件名称,同时指定本配置文件的Agent名称为a1
a1.sources=r1
a1.channels=c1 c2
#定义Source组件相关配置
#使用Taildir Source
a1.sources.r1.type = TAILDIR
#配置Taildir Source,保存断点位置文件的目录
a1.sources.r1.positionFile = /opt/module/flume-1.9.0-bin/exam/log_position.json
#配置监控目录组
a1.sources.r1.filegroups = f1
#配置目录组下的目录,可配置多个目录
a1.sources.r1.filegroups.f1 = /opt/app/exam/app.+
#配置Source发送数据的目标Channel
a1.sources.r1.channels = c1
# configure channel配置Channel
#配置Channel类型为Kafka Channel
a1.channels.c1.type = org.apache.flume.channel.kafka.KafkaChannel
#配置Kafka集群节点服务器列表
a1.channels.c1.kafka.bootstrap.servers = hadoop011:9092,hadoop012:9092,hadoop013:9092
#配置该Channel发往Kafka的Topic,该Topic需要在Kafka中提前创建
a1.channels.c1.kafka.topic = topic_exam
#配置不将header信息解析为event内容
a1.channels.c1.parseAsFlumeEvent = false
#配置该Kafka Channel所属的消费者组名,为实现multiplexing类型的Channel选择器,应将2个Kafka Channel配置相同的消费者组
a1.channels.c1.kafka.consumer.group.id = exam-consumer
启动kafka
# 静默启动
./bin/kafka-server-start.sh -daemon ./config/server.properties
# ./bin/kafka-server-stop.sh ./config/server.properties
# 创建topic
kafka-topics.sh --create --partitions 3 --replication-factor 2 --topic mytopic --zookeeper master:2181, slave1:2181, slave2:2181
# 创建生产者
kafka-console-producer.sh --broker-list master:9092,slave1:9092,slave2:9092 --topic mytopic
# 创建消费者
kafka-console-consumer.sh
--zookeeper master:2181, slave1:2181, slave2:2181
--topic mytopic
--from-beginning
启动flume
./bin/flume-ng agent -c ./conf -f ./conf/flume-kafka.conf -n a1 -Dflume.root.logger=INFO,console
启动nc
nc master 7777