kafka消费者实时消费数据存入hdfs java scalca 代码

hadoop-client依赖很乱 调试很多次cdh版本好多jar没有 用hadoop2.7.3可以

   自定义输出流的池子进行流管理
 public void writeLog2HDFS(String path, byte[] log) {
        try {
            //得到我们的装饰流
            FSDataOutputStream out = HDFSOutputStreamPool.getInstance().takeOutputStream(path);
            out.write(log);
            out.write("\r\n".getBytes());
            out.hsync();
            out.close();
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
}

/**
  * @created by imp ON 2019/3/1
  */
object KafkaScalaConsumer {

  val  write=new HDFSWriter()

  def ZK_CONN     = "192.168.121.12:2181"
  def GROUP_ID    = "1test-consumer-group109"
  def TOPIC       = "eshop"


  def main(args: Array[String]): Unit = {
    //println(" 开始了 ")

    val connector = Consumer.create(createConfig())

    val topicCountMap = new HashMap[String, Int]()
    topicCountMap.put(TOPIC, 3) // TOPIC在创建时就指定了它有3个partition

    val msgStreams: Map[String, List[KafkaStream[Array[Byte], Array[Byte]]]] = connector.createMessageStreams(topicCountMap)

    println("# of streams is " + msgStreams.get(TOPIC).get.size)

    val threadPool:ExecutorService=Executors.newFixedThreadPool(3)

    var index = 0;
    for (stream <- msgStreams.get(TOPIC).get) {
      threadPool.execute(new ThreadDemo("consumer_"+index,stream))
      index+=1;
    }
  }

  class ThreadDemo(threadName:String,stream:KafkaStream[Array[Byte], Array[Byte]]) extends Runnable{
    override def run(): Unit = {

      val it: ConsumerIterator[Array[Byte], Array[Byte]] = stream.iterator();

      while(it.hasNext()){
        val data : MessageAndMetadata[Array[Byte], Array[Byte]] = it.next()
        val msg=data.message()
        val log = new String(msg)
        val arr = StringUtil.splitLog(log)
        if (arr == null || arr.length < 1) return //todo: continue is not supported
        //主机名
        val hostname = StringUtil.getHostname(arr)
        //日期串
        val dateStr = StringUtil.formatYyyyMmDdHhMi(arr)
        //path
        val rawPath = "/spark/eshop/" + dateStr + "/" + hostname + ".log"

        //写入数据到hdfs
        System.out.println(log)
       write .writeLog2HDFS(rawPath, msg)
      }
    }
  }

  def createConfig(): ConsumerConfig = {
    val props = new Properties()
    props.put("zookeeper.connect", ZK_CONN)
//    props.put("bootstrap.servers","localhost:9092")
    props.put("group.id", GROUP_ID)
    props.put("zookeeper.session.timeout.ms", "5000")
    props.put("zookeeper.connection.timeout.ms","10000")
    props.put("auto.offset.reset", "smallest")
    props.put("auto.commit.interval.ms", "300")
    props.put("rebalance.backoff.ms","2000")
    props.put("rebalance.max.retries","10")
    props.put("auto.offset.reset", "smallest")
    new ConsumerConfig(props)
  }
}

posted @ 2019-03-04 11:25 夜半钟声到客船阅读(1613) 评论(1) 编辑收藏举报

会员力量，点亮园子希望

刷新页面返回顶部

落霞与孤鹜齐飞

中山桥砖厂搬砖者

kafka消费者实时消费数据存入hdfs java scalca 代码

公告