kafka消费者实时消费数据存入hdfs java scalca 代码
hadoop-client依赖很乱 调试很多次cdh版本好多jar没有 用hadoop2.7.3可以
自定义输出流的池子进行流管理 public void writeLog2HDFS(String path, byte[] log) { try { //得到我们的装饰流 FSDataOutputStream out = HDFSOutputStreamPool.getInstance().takeOutputStream(path); out.write(log); out.write("\r\n".getBytes()); out.hsync(); out.close(); } catch (Exception e) { e.printStackTrace(); } } }
/** * @created by imp ON 2019/3/1 */ object KafkaScalaConsumer { val write=new HDFSWriter() def ZK_CONN = "192.168.121.12:2181" def GROUP_ID = "1test-consumer-group109" def TOPIC = "eshop" def main(args: Array[String]): Unit = { //println(" 开始了 ") val connector = Consumer.create(createConfig()) val topicCountMap = new HashMap[String, Int]() topicCountMap.put(TOPIC, 3) // TOPIC在创建时就指定了它有3个partition val msgStreams: Map[String, List[KafkaStream[Array[Byte], Array[Byte]]]] = connector.createMessageStreams(topicCountMap) println("# of streams is " + msgStreams.get(TOPIC).get.size) val threadPool:ExecutorService=Executors.newFixedThreadPool(3) var index = 0; for (stream <- msgStreams.get(TOPIC).get) { threadPool.execute(new ThreadDemo("consumer_"+index,stream)) index+=1; } } class ThreadDemo(threadName:String,stream:KafkaStream[Array[Byte], Array[Byte]]) extends Runnable{ override def run(): Unit = { val it: ConsumerIterator[Array[Byte], Array[Byte]] = stream.iterator(); while(it.hasNext()){ val data : MessageAndMetadata[Array[Byte], Array[Byte]] = it.next() val msg=data.message() val log = new String(msg) val arr = StringUtil.splitLog(log) if (arr == null || arr.length < 1) return //todo: continue is not supported //主机名 val hostname = StringUtil.getHostname(arr) //日期串 val dateStr = StringUtil.formatYyyyMmDdHhMi(arr) //path val rawPath = "/spark/eshop/" + dateStr + "/" + hostname + ".log" //写入数据到hdfs System.out.println(log) write .writeLog2HDFS(rawPath, msg) } } } def createConfig(): ConsumerConfig = { val props = new Properties() props.put("zookeeper.connect", ZK_CONN) // props.put("bootstrap.servers","localhost:9092") props.put("group.id", GROUP_ID) props.put("zookeeper.session.timeout.ms", "5000") props.put("zookeeper.connection.timeout.ms","10000") props.put("auto.offset.reset", "smallest") props.put("auto.commit.interval.ms", "300") props.put("rebalance.backoff.ms","2000") props.put("rebalance.max.retries","10") props.put("auto.offset.reset", "smallest") new ConsumerConfig(props) } }
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· AI与.NET技术实操系列:向量存储与相似性搜索在 .NET 中的实现
· 基于Microsoft.Extensions.AI核心库实现RAG应用
· Linux系列:如何用heaptrack跟踪.NET程序的非托管内存泄露
· 开发者必知的日志记录最佳实践
· SQL Server 2025 AI相关能力初探
· 震惊!C++程序真的从main开始吗?99%的程序员都答错了
· 【硬核科普】Trae如何「偷看」你的代码?零基础破解AI编程运行原理
· 单元测试从入门到精通
· 上周热点回顾(3.3-3.9)
· winform 绘制太阳,地球,月球 运作规律