flume将数据写入各个组件

一、flume集成hdfs,将数据写入到hdfs

          a1.sources = r1
          a1.sinks = k1
          a1.channels = c1
                
          a1.sources.r1.type =avro
          a1.sources.r1.bind=0.0.0.0
          a1.sources.r1.port=8888
          #存储在本地的hdfs
          a1.sinks.k1.type = hdfs
          a1.sinks.k1.hdfs.path = /flume/hdfs_sinkData/%y-%m-%d/%H%M/%S
          a1.sinks.k1.hdfs.filePrefix = events-
          a1.sinks.k1.hdfs.round = true
          a1.sinks.k1.hdfs.roundValue = 10
          a1.sinks.k1.hdfs.roundUnit = minute
          a1.sinks.k1.hdfs.useLocalTimeStamp = true
          #中间管道
          a1.channels.c1.type = file
          a1.channels.c1.checkpointDir = /yang/flume_source/checkpoint
          a1.channels.c1.dataDirs  = /yang/flume_source/data
 
          a1.sources.r1.channels = c1
          a1.sinks.k1.channel = c1
 
二、flume集成kafka,将数据写到kafka
  a1.channels = c1
       a1.sources =s1
       a1.sinks = k1
 
       # 定义channel
       a1.channels.c1.type = memory
 
       # 定义source
       a1.sources.s1.channels = c1
       a1.sources.s1.type = avro
       a1.sources.s1.bind = 0.0.0.0
       a1.sources.s1.port = 8888
 
       # 定义sink
       a1.sinks.k1.type = org.apache.flume.sink.kafka.KafkaSink
       a1.sinks.k1.topic = testtopic
       a1.sinks.k1.brokerList = 172.20.237.111:9092
       a1.sinks.k1.requiredAcks = 1
       a1.sinks.k1.batchSize = 20
       a1.sinks.k1.channel = c1
 
三、flume集成hive,将数据写入到hive
      # Name the components on this agent
      a1.sources = r1
      a1.sinks = k1
      a1.channels = c1
 
      # Describe/configure the source
      a1.sources.r1.type = netcat
      a1.sources.r1.bind = localhost
      a1.sources.r1.port = 44444
 
     # Describe the sink
    a1.sinks.k1.type = hive
    a1.sinks.k1.hive.metastore = thrift://master:9083
    a1.sinks.k1.hive.database = default
    a1.sinks.k1.hive.table = t_pages
 a1.sinks.k1.useLocalTimeStamp = false
 a1.sinks.k1.round = true
 a1.sinks.k1.roundValue = 10
 a1.sinks.k1.roundUnit = minute
 a1.sinks.k1.serializer = DELIMITED
 a1.sinks.k1.serializer.delimiter = "\t"
 a1.sinks.k1.serializer.serdeSeparator = '\t'
 a1.sinks.k1.serializer.fieldnames     =date,user_id,session_id,page_id,action_time,search_keyword,click_category_id,click_product_id,order_category_ids,order_product_ids,pay_category_ids,pay_product_ids,city_id
 
 # Use a channel which buffers events in memory
 a1.channels.c1.type = memory
 a1.channels.c1.capacity = 1000
 a1.channels.c1.transactionCapacity = 100
 
 # Bind the source and sink to the channel
 a1.sources.r1.channels = c1
 a1.sinks.k1.channel = c1
四、flume集成hbase
a1.sinks.k1.type = org.apache.flume.sink.hbase.AsyncHBaseSink
a1.sinks.k1.table = Router #设置Hbase的表名
a1.sinks.k1.columnFamily = log #设置Hbase的columnFamily
a1.sinks.k1.serializer.payloadColumn=serviceTime,browerOS,clientTime,screenHeight,
screenWidth,url,userAgent,mobileDevice,gwId,mac #设置Hbase的column
a1.sinks.k1.serializer = org.apache.flume.sink.hbase.BaimiAsyncHbaseEventSerializer
# 设置serializer处理类 

posted @ 2019-02-21 11:31  xiaolaotou  阅读(391)  评论(0编辑  收藏  举报