sparkstreaming用redis管理偏移量
RedisUtils.scala
import java.io.FileInputStream
import java.util.Properties
import redis.clients.jedis.{Jedis, JedisPool, JedisPoolConfig}
object RedisUtils {
private val properties = new Properties()
val path: String = Thread.currentThread().getContextClassLoader.getResource("jedis.properties").getPath
properties.load(new FileInputStream(path))
val host: String = properties.getProperty("redis.host")
val auth: String = properties.getProperty("redis.auth")
val port: Int = properties.getProperty("redis.port").toInt
val config = new JedisPoolConfig
config.setMaxTotal(properties.getProperty("redis.maxConn").toInt)
config.setMaxIdle(properties.getProperty("redis.maxIdle").toInt)
val pool: JedisPool = new JedisPool(config, host, port, 10000)
def getConnections(): Jedis ={
pool.getResource
}
}
```
### OffsetKafkaRedis.scala
```java
import java.util
import org.apache.kafka.clients.consumer.ConsumerRecord
import scala.collection.mutable
import org.apache.kafka.common.TopicPartition
import org.apache.kafka.common.serialization.StringDeserializer
import org.apache.spark.rdd.RDD
import org.apache.spark.streaming.dstream.InputDStream
import org.apache.spark.streaming.kafka010.{ConsumerStrategies, HasOffsetRanges, KafkaUtils, LocationStrategies, OffsetRange}
import org.apache.spark.streaming.{Seconds, StreamingContext}
import org.apache.spark.{SparkConf, SparkContext}
import redis.clients.jedis.{Jedis, Pipeline}
import org.apache.log4j.Logger
import scala.util.Try
import scala.collection.JavaConverters._
object OffsetKafaRedis {
private val logger: Logger = Logger.getLogger(this.getClass)
def getOffset(topics:Array[String], groupId:String): mutable.Map[TopicPartition, Long] = {
val fromOffset = scala.collection.mutable.Map[TopicPartition, Long]()
val jedis: Jedis = RedisUtils.getConnections()
topics.foreach(topic => {
val keys: util.Set[String] = jedis.keys(s"kafka_offset:${groupId}:${topic}:*")
if (!keys.isEmpty) {
keys.asScala.foreach(key => {
val offset: String = jedis.get(key)
val partition:String = Try(key.split(s"kafka_offset:${groupId}:${topic}:").apply(1)).getOrElse("0")
println(s"[INFO] 当前主题:${topic}, 当前分区:${partition}, 当前偏移量:${offset}")
fromOffset.put(new TopicPartition(topic, partition.toInt), offset.toLong)
})
}
})
jedis.close()
fromOffset
}
def main(args: Array[String]): Unit = {
val conf: SparkConf = new SparkConf().setAppName("redisOffsetDemo").setMaster("local[2]")
val context = new SparkContext(conf)
context.setLogLevel("WARN")
val ssc: StreamingContext = new StreamingContext(context, Seconds(10))
val topics = Array("offsetDemo")
val groupId = "g1"
val kafkaParams = Map[String, Object](
"bootstrap.servers" -> "linux01:9092,linux02:9092,linux03:9092",
"key.deserializer" -> classOf[StringDeserializer],
"value.deserializer" -> classOf[StringDeserializer],
"group.id" -> groupId,
"auto.offset.reset" -> "latest",
"enable.auto.commit" -> (false: java.lang.Boolean)
)
val offsets: mutable.Map[TopicPartition, Long] = getOffset(topics, groupId)
val kafkaDStream: InputDStream[ConsumerRecord[String, String]] = KafkaUtils.createDirectStream[String, String](
ssc,
LocationStrategies.PreferConsistent,
ConsumerStrategies.Subscribe[String, String](topics, kafkaParams, offsets)
)
kafkaDStream.foreachRDD(
rdd => {
val offsetRanges: Array[OffsetRange] = rdd.asInstanceOf[HasOffsetRanges].offsetRanges
val jedis: Jedis = RedisUtils.getConnections()
val pipeline: Pipeline = jedis.pipelined()
pipeline.multi()
try {
val result: RDD[(String, Int)] = rdd.map(_.value()).flatMap(_.split(" ")).map((_, 1)).reduceByKey(_ + _)
result.foreach(println)
offsetRanges.foreach(
iter => {
val key: String = s"kafka_offset:${groupId}:${iter.topic}:${iter.partition}"
val value: Long = iter.untilOffset
println(s"[INOF]所属键:${key}更新偏移量:${value}")
pipeline.set(key, value.toString)
}
)
pipeline.exec()
pipeline.sync()
} catch {
case e:Exception => {
logger.error("[ERROR]",e)
pipeline.discard()
}
} finally {
pipeline.close()
jedis.close()
}
}
)
ssc.start()
ssc.awaitTermination()
}
}
```
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· Linux系列:如何用 C#调用 C方法造成内存泄露
· AI与.NET技术实操系列(二):开始使用ML.NET
· 记一次.NET内存居高不下排查解决与启示
· 探究高空视频全景AR技术的实现原理
· 理解Rust引用及其生命周期标识(上)
· 物流快递公司核心技术能力-地址解析分单基础技术分享
· .NET 10首个预览版发布:重大改进与新特性概览!
· 单线程的Redis速度为什么快?
· 展开说说关于C#中ORM框架的用法!
· Pantheons:用 TypeScript 打造主流大模型对话的一站式集成库
2020-08-09 猴子补丁
2020-08-09 设计模式创造者模式--python