每日学习

dir-hdfs.conf:
#spooldir：flume中自带的读取目录的source，只要出现新文件就会被读走
#定义三大组件的名称
ag1.sources = source1
ag1.sinks = sink1
ag1.channels = channel1
 
# 配置source组件
ag1.sources.source1.type = spooldir    #官网flume.apache.org
ag1.sources.source1.spoolDir = /root/log/ #具体的目录
ag1.sources.source1.fileSuffix=.FINISHED   #文件后缀，文件内容被读走了就改成这样了
 
 
# 配置sink组件
ag1.sinks.sink1.type = hdfs
ag1.sinks.sink1.hdfs.path =hdfs://node1:8020/access_log/%y-%m-%d/%H-%M
ag1.sinks.sink1.hdfs.filePrefix = app_log
ag1.sinks.sink1.hdfs.fileSuffix = .log
ag1.sinks.sink1.hdfs.batchSize= 100
ag1.sinks.sink1.hdfs.fileType = DataStream
ag1.sinks.sink1.hdfs.writeFormat =Text
 
## roll：滚动切换：控制写文件的切换规则
ag1.sinks.sink1.hdfs.rollSize = 512000    ## 按文件体积（字节）来切   500k
#需要讲的：hdfs.rollInterval  hdfs.rollCount   hdfs.writeFormat  hdfs.fileType
ag1.sinks.sink1.hdfs.rollCount = 1000000  ## 按event条数切
ag1.sinks.sink1.hdfs.rollInterval = 60    ## 按时间间隔切换文件     三个，哪个满足就用哪个
 
## 控制生成目录的规则    目录多久切一次
ag1.sinks.sink1.hdfs.round = true
ag1.sinks.sink1.hdfs.roundValue = 10    #多久切一次  10分钟
ag1.sinks.sink1.hdfs.roundUnit = minute   #单位
 
ag1.sinks.sink1.hdfs.useLocalTimeStamp = true
 
# channel组件配置
ag1.channels.channel1.type = memory
ag1.channels.channel1.capacity = 500000   ## event条数  在通道中暂存的最大数量  数量要大于sink的batchSize的100条
ag1.channels.channel1.transactionCapacity = 600  ##flume事务控制所需要的缓存容量600条event   多少条记录归拢到一个事务中
 
# 绑定source、channel和sink之间的连接
ag1.sources.source1.channels = channel1
ag1.sinks.sink1.channel = channel1
posted @ 2021-10-19 22:15 睡觉不困阅读(37) 评论(0) 编辑收藏举报
努力加载评论中...
刷新页面返回顶部
睡觉不困

每日学习

公告