tailDir 启动:
bin/flume-ng agent --conf conf --conf-file job/taildirSource-kafka.conf --name a1 -Dflume.root.logger=INFO,console
flume#
taildirSource-kafka.conf
#taildir source http://flume.apache.org/releases/content/1.9.0/FlumeUserGuide.html#kafka-channel #为各个组件命名 a1.sources = r1 a1.channels = c1 #声明source a1.sources.r1.type = TAILDIR a1.sources.r1.filegroups = f1 #监控的目录 /log/app.* a1.sources.r1.filegroups.f1 = /home/sea/Desktop/xx/log/.*log.* #开启断点续传,不配置 默认开启 a1.sources.r1.positionFile= /home/sea/Desktop/xx/history/taildir_position.json a1.sources.r1.fileHeader = true #kafka channel #channels a1.channels.c1.type = org.apache.flume.channel.kafka.KafkaChannel a1.channels.c1.kafka.bootstrap.servers = hadoop001:9092,hadoop004:9092,hadoop005:9092 a1.channels.c1.kafka.topic = topic_log a1.channels.c1.parseAsFlumeEvent = false # 配置生产者的ack为1 a1.channels.c1.kafka.producer.acks = 1 # 配置生产者的批大小为1000 a1.channels.c1.kafka.producer.batch.size = 1000 a1.channels.c1.kafka.producer.linger.ms = 5000 #绑定source和channel以及sink和channel的关系 a1.sources.r1.channels = c1
tail -f
1)配置flume(flume-kafka.conf) # define a1.sources = r1 a1.sinks = k1 a1.channels = c1 # source a1.sources.r1.type = exec a1.sources.r1.command = tail -F -c +0 /opt/module/datas/flume.log a1.sources.r1.shell = /bin/bash -c # sink a1.sinks.k1.type = org.apache.flume.sink.kafka.KafkaSink a1.sinks.k1.kafka.bootstrap.servers = hadoop102:9092,hadoop103:9092,hadoop104:9092 a1.sinks.k1.kafka.topic = first a1.sinks.k1.kafka.flumeBatchSize = 20 a1.sinks.k1.kafka.producer.acks = 1 a1.sinks.k1.kafka.producer.linger.ms = 1 # channel a1.channels.c1.type = memory a1.channels.c1.capacity = 1000 a1.channels.c1.transactionCapacity = 100 # bind a1.sources.r1.channels = c1 a1.sinks.k1.channel = c1 2) 启动kafkaIDEA消费者 3) 进入flume根目录下,启动flume $ bin/flume-ng agent -c conf/ -n a1 -f jobs/flume-kafka.conf 4) 向 /opt/module/datas/flume.log里追加数据,查看kafka消费者消费情况 $ echo hello > /opt/module/datas/flume.log