监听文件-定时文件监听
| import org.apache.spark.streaming.{Seconds, StreamingContext} |
| import org.apache.spark.SparkConf |
| |
| |
| val sparkConf = new SparkConf().setAppName("fileStream").setMaster("local[3]") |
| |
| val ssc = new StreamingContext(sparkConf, Seconds(10)) |
| |
| val lines = ssc.textFileStream("D:\\sparkStreamLog") |
| |
| lines.print() |
| |
| ssc.start() |
| |
| ssc.awaitTermination() |
| ------------------------------------------- |
| Time: 1565595490000 ms |
| ------------------------------------------- |
| 此处打印监听的内容... |
| ------------------------------------------- |
| Time: 1565595500000 ms |
| ------------------------------------------- |
监听文件-结构化数据流
数据
| {"name":"json","age":23,"hobby":"running"} |
| {"name":"charles","age":32,"hobby":"basketball"} |
| {"name":"tom","age":28,"hobby":"football"} |
| {"name":"lili","age":24,"hobby":"running"} |
| {"name":"bob","age":20,"hobby":"swimming"} |
| import org.apache.spark.SparkConf |
| import org.apache.spark.rdd.RDD |
| import org.apache.spark.sql.SparkSession |
| import org.apache.spark.sql.types.StructType |
| import org.apache.spark.streaming.{Seconds, StreamingContext} |
| |
| val spark = SparkSession.builder() |
| .appName("dStream_1") |
| .master("local[*]") |
| .getOrCreate() |
| |
| |
| import spark.implicits._ |
| |
| |
| val userSchema = new StructType() |
| .add("name","string") |
| .add("age","integer") |
| .add("hobby","string") |
| |
| |
| |
| val userDF = spark.readStream |
| .schema(userSchema) |
| .json("D:/JsonFile") |
| |
| val userlittler25DF = userDF.filter($"age"<25) |
| |
| val hobbyDF = userlittler25DF.groupBy("hobby").count() |
| |
| val query = hobbyDF.writeStream |
| .outputMode("complete") |
| .format("console") |
| .start() |
| |
| query.awaitTermination() |
| |
| ------------------------------------------- |
| Batch: 1 |
| ------------------------------------------- |
| +--------+-----+ |
| | hobby|count| |
| +--------+-----+ |
| | running| 2| |
| |swimming| 1| |
| +--------+-----+ |
监听端口套接字
随机读取文件数据发送
table.txt 数据
| scala java |
| java C++ |
| C C++ PHP |
| C++ PHP |
| python C++ |
| PHP java |
| |
数据发送服务程序
| import java.io.PrintWriter |
| import java.net.ServerSocket |
| import scala.io.Source |
| |
| def main(args: Array[String]): Unit = { |
| val file = "D:\\sparkStreamLog\\change\\table.txt" |
| |
| val lines = Source.fromFile(file).getLines().toList |
| |
| val rowCount = lines.length |
| |
| val listen = new ServerSocket(6666) |
| |
| while (true) { |
| |
| val socket = listen.accept() |
| |
| val thread = new Thread() { |
| |
| override def run = { |
| |
| println("客户端地址为:" + socket.getInetAddress) |
| |
| val send = new PrintWriter(socket.getOutputStream, true) |
| while (true) { |
| |
| Thread.sleep(3000) |
| |
| val content = lines(index(rowCount)) |
| println("******") |
| println(content) |
| |
| send.write(content + "\n") |
| |
| send.flush() |
| } |
| |
| socket.close() |
| } |
| } |
| |
| thread.start() |
| } |
| } |
| |
| |
| def index(length: Int): Int = { |
| val rd = new java.util.Random() |
| rd.nextInt(length) |
| } |
| |
| import org.apache.spark.SparkConf |
| import org.apache.spark.rdd.RDD |
| import org.apache.spark.sql.SparkSession |
| import org.apache.spark.streaming.{Seconds, StreamingContext} |
| |
| |
| val Conf = new SparkConf().setAppName("套接字流").setMaster("local[2]") |
| val ss = new StreamingContext(Conf, Seconds(6)) |
| |
| val lines = ss.socketTextStream("localhost", 6666) |
| |
| lines.flatMap(_.split(" ")).map(x => (x, 1)).reduceByKey((x, y) => x + y).print() |
| |
| ss.start() |
| |
| ss.awaitTermination() |
| ------------------------------------------- |
| Time: 1565598120000 ms |
| ------------------------------------------- |
| (PHP,1) |
| (java,2) |
| (C++,1) |
| ------------------------------------------- |
| Time: 1565598126000 ms |
| ------------------------------------------- |
| (scala,2) |
| (java,2) |
监听端口数据-统计包括历史数据
| import java.sql.{Connection, DriverManager, PreparedStatement} |
| import org.apache.spark.SparkConf |
| import org.apache.spark.streaming.{Seconds, StreamingContext} |
| |
| |
| val conf = new SparkConf().setMaster("local[2]").setAppName("NetworkWordCountStateful") |
| |
| val sc = new StreamingContext(conf, Seconds(5)) |
| |
| |
| sc.checkpoint("D:\\sparkStreamLog\\change") |
| |
| |
| val lines = sc.socketTextStream("localhost", 6666) |
| |
| |
| val updateFunc = (values: Seq[Int], state: Option[Int]) => { |
| |
| |
| val currentCount = values.foldLeft(0)(_ + _) |
| |
| val previousCount = state.getOrElse(0) |
| println("#################", values.toBuffer, state.toBuffer, "\t result: " + (currentCount + previousCount)) |
| |
| Some(currentCount + previousCount) |
| } |
| |
| |
| val wordDstream = lines.flatMap(_.split(" ")) |
| .map(x => (x, 1)) |
| .updateStateByKey[Int](updateFunc) |
| |
| |
| wordDstream.foreachRDD(rdd => { |
| |
| def funChange(words: Iterator[(String, Int)]): Unit = { |
| |
| var conn: Connection = null |
| |
| var stat: PreparedStatement = null |
| |
| try { |
| val url = "jdbc:mysql://localhost:3306/testdb?serverTimezone=UTC" |
| val user = "root" |
| val password = "123456" |
| |
| conn = DriverManager.getConnection(url, user, password) |
| |
| words.foreach(word => { |
| |
| val sql = "insert into fromsparkdata(word,counts) values (?,?)" |
| |
| stat = conn.prepareStatement(sql) |
| |
| stat.setString(1, word._1.trim) |
| |
| stat.setInt(2, word._2.toInt) |
| |
| stat.executeUpdate() |
| }) |
| } catch { |
| case e: Exception => e.printStackTrace() |
| } finally { |
| |
| if (stat != null) { |
| stat.close() |
| } |
| |
| if (conn != null) { |
| conn.close() |
| } |
| } |
| } |
| |
| |
| val reparRdd = rdd.repartition(3) |
| |
| |
| reparRdd.foreachPartition(funChange) |
| }) |
| |
| |
| |
| |
| |
| sc.start() |
| sc.awaitTermination() |
| 新数据统计的个数 历史数据的个数 返回的个数 |
| (# |
| (# |
| |
| (# |
| (# |
| |
| (# |
| (# |
监听端口结构化数据
| import org.apache.spark.SparkConf |
| import org.apache.spark.rdd.RDD |
| import org.apache.spark.sql.SparkSession |
| import org.apache.spark.streaming.{Seconds, StreamingContext} |
| |
| |
| val spark = SparkSession.builder() |
| .appName("dStream_1") |
| .master("local[*]") |
| .getOrCreate() |
| |
| import spark.implicits._ |
| |
| val lines = spark.readStream |
| .format("socket") |
| .option("host", "localhost") |
| .option("port", 6666) |
| .load() |
| |
| |
| val words = lines.as[String] |
| |
| .flatMap(_.split(" ")) |
| |
| val wordCounts = words.groupBy("value").count() |
| |
| val query = wordCounts.writeStream |
| .outputMode("complete") |
| .format("console") |
| .start() |
| |
| query.awaitTermination() |
| ------------------------------------------- |
| Batch: 0 |
| ------------------------------------------- |
| +-----+-----+ |
| |value|count| |
| +-----+-----+ |
| | C++| 1| |
| | C| 1| |
| | PHP| 1| |
| +-----+-----+ |
| |
| ------------------------------------------- |
| Batch: 1 |
| ------------------------------------------- |
| +------+-----+ |
| | value|count| |
| +------+-----+ |
| | C++| 5| |
| | C| 2| |
| | scala| 1| |
| | PHP| 5| |
| | java| 2| |
| |python| 1| |
| +------+-----+ |
监听RDD队列数据
| import org.apache.spark.SparkConf |
| import org.apache.spark.rdd.RDD |
| import org.apache.spark.sql.SparkSession |
| import org.apache.spark.streaming.{Seconds, StreamingContext} |
| |
| |
| val sparkConf = new SparkConf().setAppName("RDDQueue").setMaster("local[2]") |
| |
| val ssc = new StreamingContext(sparkConf, Seconds(2)) |
| |
| val rddQueue = new scala.collection.mutable.SynchronizedQueue[RDD[Int]]() |
| |
| val queueStream = ssc.queueStream(rddQueue) |
| |
| queueStream.map(r => (r % 10, 1)).reduceByKey(_ + _).print() |
| |
| ssc.start() |
| |
| for (i <- 1 to 5) { |
| rddQueue += ssc.sparkContext.makeRDD(1 to 100, 2) |
| Thread.sleep(3000) |
| } |
| |
| |
| Thread.sleep(30000) |
| ssc.stop() |
| ------------------------------------------- |
| Time: 1565598778000 ms |
| ------------------------------------------- |
| (4,10) |
| (0,10) |
| (6,10) |
| (8,10) |
| (2,10) |
| (1,10) |
| (3,10) |
| (7,10) |
| (9,10) |
| (5,10) |
【推荐】还在用 ECharts 开发大屏?试试这款永久免费的开源 BI 工具!
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步