Spark算上下基线
def baseLine(sc: SparkContext): Unit = { println("--------------------baseLine start--------------------") var data1Rdd = sc.textFile("/test/baseLineTestData.txt") var map = HashMap[String, HashMap[String, collection.mutable.ArrayBuffer[Double]]]() val data1 = data1Rdd.collect() data1.map { line => println("--------------------data1.foreach start--------------------") val parts = line.split('|') val ip = parts(0) val port = parts(1) val startTime = parts(2) val endTime = parts(3) val sun = parts(4).toDouble println("ip:"+ip) println("port:"+port) println("startTime:"+startTime) println("endTime:"+endTime) println("sun:"+sun) //ip+port,14:02 14:07 List //ip+port,15:02 15:07 List val key1 = ip + "_" + port println("key1:"+key1) val key2 = startTime.split(" ")(1) + "_" + endTime.split(" ")(1) println("key2:"+key2) var tmpMap = map.get(key1) if (tmpMap != null && tmpMap.size > 0) { println("--------------------map is not null--------------------") val sumArray = tmpMap.get(key2) if (sumArray != null) { sumArray += sun } } else { println("--------------------map is null--------------------") //如果当前Key不存在的话,是一个全新的Ip val sumArray = collection.mutable.ArrayBuffer[Double]() val secondMap = HashMap[String, collection.mutable.ArrayBuffer[Double]]() secondMap += (key2 -> sumArray) map += (key1 -> secondMap) } } println("--------------------get data is end--------------------") map.map(e => { println("--------------------Statistics start --------------------") val resultKey1 = e._1 val resultVal1 = e._2 println("resultKey1:" + resultKey1) resultVal1.foreach(f => { val resultKey2 = f._1 val resultVal2 = f._2 println("resultKey2:" + resultKey2) val dataArray = resultVal2.map(f => Vectors.dense(f)) val summary: MultivariateStatisticalSummary = Statistics.colStats(sc.parallelize(dataArray)) // println("--------------------mean:"+summary.mean+" --------------------") println("--------------------variance:"+summary.variance+" --------------------") println("--------------------mean apply 0:"+summary.mean.toArray.apply(0)+" --------------------") println("--------------------variance apply 0:"+summary.variance.apply(0)+" --------------------") val upbase = summary.mean.toArray.apply(0) + 1.960 * Math.sqrt(summary.variance.apply(0)) val downbase = summary.mean.toArray.apply(0) - 1.960 * Math.sqrt(summary.variance.apply(0)) println("------------------- " + upbase + " ---------- " + downbase) val df = new DecimalFormat(".##") val upbaseString = df.format(upbase) val downbaseString = df.format(downbase) //resultMap.put(key, value) val result3 = HashMap[Double, Double]() //result3 +=(upbase -> downbase) println("ip port:" + resultKey1 + ",time:" + resultKey2 + ",upbase:" + upbase + ",downbase:" + downbase) }) }) println("--------------------baseLine end --------------------") }
需求:计算某一个IP的端口在某一个时间点的流量上下基线
数据样例:
192.168.10.110|8080|2015-10-14 14:02|2015-10-14 14:07|3210981 192.168.10.110|8080|2015-10-13 14:02|2015-10-13 14:07|3210881 192.168.10.110|8080|2015-10-12 14:02|2015-10-12 14:07|3210781 192.168.10.110|8080|2015-10-11 14:02|2015-10-11 14:07|3210681 192.168.10.110|8080|2015-10-10 14:02|2015-10-10 14:07|3210581 192.168.10.110|8080|2015-10-09 14:02|2015-10-09 14:07|3210481 192.168.10.110|8080|2015-10-08 14:02|2015-10-08 14:07|3210381 192.168.10.110|8080|2015-10-07 14:02|2015-10-07 14:07|3210281 192.168.10.110|8080|2015-10-06 14:02|2015-10-06 14:07|3210181 192.168.10.110|8080|2015-10-05 14:02|2015-10-05 14:07|3210081 192.168.10.110|8080|2015-10-04 14:02|2015-10-04 14:07|3219981 192.168.10.110|8080|2015-10-03 14:02|2015-10-03 14:07|3218981 192.168.10.110|8080|2015-10-02 14:02|2015-10-02 14:07|3217981 192.168.10.110|8080|2015-10-01 14:02|2015-10-01 14:07|3216981 192.168.10.110|8080|2015-09-30 14:02|2015-09-30 14:07|3215981 192.168.10.110|8080|2015-09-29 14:02|2015-09-29 14:07|3214981 192.168.10.110|8080|2015-09-28 14:02|2015-09-28 14:07|3213981 192.168.10.110|8080|2015-09-27 14:02|2015-09-27 14:07|3212981 192.168.10.110|8080|2015-09-26 14:02|2015-09-26 14:07|3211981 192.168.10.110|8080|2015-09-25 14:02|2015-09-25 14:07|3220981 192.168.10.110|8080|2015-09-24 14:02|2015-09-24 14:07|3230981 192.168.10.110|8080|2015-09-23 14:02|2015-09-23 14:07|3240981 192.168.10.110|8080|2015-09-22 14:02|2015-09-22 14:07|3250981 192.168.10.110|8080|2015-09-21 14:02|2015-09-21 14:07|3260981 192.168.10.110|8080|2015-09-20 14:02|2015-09-20 14:07|3270981 192.168.10.110|8080|2015-09-19 14:02|2015-09-19 14:07|3280981 192.168.10.110|8080|2015-09-18 14:02|2015-09-18 14:07|3290981 192.168.10.110|8080|2015-09-17 14:02|2015-09-17 14:07|3210982 192.168.10.110|8080|2015-09-16 14:02|2015-09-16 14:07|3210983 192.168.10.110|8080|2015-09-15 14:02|2015-09-15 14:07|3210984 192.168.10.110|8080|2015-09-14 14:02|2015-09-14 14:07|3210985 192.168.10.110|8080|2015-09-13 14:02|2015-09-13 14:07|3210986 192.168.10.110|8080|2015-09-12 14:02|2015-09-12 14:07|3210987 192.168.10.110|8080|2015-09-11 14:02|2015-09-11 14:07|3210988 192.168.10.110|8080|2015-09-10 14:02|2015-09-10 14:07|3110989 192.168.10.110|8080|2015-09-09 14:02|2015-09-09 14:07|3210981 192.168.10.110|8080|2015-09-07 14:02|2015-09-07 14:07|3310981 192.168.10.110|8080|2015-09-06 14:02|2015-09-06 14:07|3410981 192.168.10.110|8080|2015-09-05 14:02|2015-09-05 14:07|2510981 192.168.10.110|8081|2015-10-14 14:02|2015-10-14 14:07|2210981 192.168.10.110|8081|2015-10-13 14:02|2015-10-13 14:07|2210881 192.168.10.110|8081|2015-10-12 14:02|2015-10-12 14:07|2210781 192.168.10.110|8081|2015-10-11 14:02|2015-10-11 14:07|2210681 192.168.10.110|8081|2015-10-10 14:02|2015-10-10 14:07|2210581 192.168.10.110|8081|2015-10-09 14:02|2015-10-09 14:07|2210481 192.168.10.110|8081|2015-10-08 14:02|2015-10-08 14:07|2210381 192.168.10.110|8081|2015-10-07 14:02|2015-10-07 14:07|2210281 192.168.10.110|8081|2015-10-06 14:02|2015-10-06 14:07|2210181 192.168.10.110|8081|2015-10-05 14:02|2015-10-05 14:07|2210081 192.168.10.110|8081|2015-10-04 14:02|2015-10-04 14:07|2219981 192.168.10.110|8081|2015-10-03 14:02|2015-10-03 14:07|2218981 192.168.10.110|8081|2015-10-02 14:02|2015-10-02 14:07|2217981 192.168.10.110|8081|2015-10-01 14:02|2015-10-01 14:07|2216981 192.168.10.110|8081|2015-09-30 14:02|2015-09-30 14:07|2215981 192.168.10.110|8081|2015-09-29 14:02|2015-09-29 14:07|2214981 192.168.10.110|8081|2015-09-28 14:02|2015-09-28 14:07|2213981 192.168.10.110|8081|2015-09-27 14:02|2015-09-27 14:07|2212981 192.168.10.110|8081|2015-09-26 14:02|2015-09-26 14:07|2211981 192.168.10.110|8081|2015-09-25 14:02|2015-09-25 14:07|2220981 192.168.10.110|8081|2015-09-24 14:02|2015-09-24 14:07|2230981 192.168.10.110|8081|2015-09-23 14:02|2015-09-23 14:07|2240981 192.168.10.110|8081|2015-09-22 14:02|2015-09-22 14:07|2250981 192.168.10.110|8081|2015-09-21 14:02|2015-09-21 14:07|2260981 192.168.10.110|8081|2015-09-20 14:02|2015-09-20 14:07|2270981 192.168.10.110|8081|2015-09-19 14:02|2015-09-19 14:07|2280981 192.168.10.110|8081|2015-09-18 14:02|2015-09-18 14:07|2290981 192.168.10.110|8081|2015-09-17 14:02|2015-09-17 14:07|2210982 192.168.10.110|8081|2015-09-16 14:02|2015-09-16 14:07|2210983 192.168.10.110|8081|2015-09-15 14:02|2015-09-15 14:07|2210984 192.168.10.110|8081|2015-09-14 14:02|2015-09-14 14:07|2210985 192.168.10.110|8081|2015-09-13 14:02|2015-09-13 14:07|2210986 192.168.10.110|8081|2015-09-12 14:02|2015-09-12 14:07|2210987 192.168.10.110|8081|2015-09-11 14:02|2015-09-11 14:07|2210988 192.168.10.110|8081|2015-09-10 14:02|2015-09-10 14:07|2110989 192.168.10.110|8081|2015-09-09 14:02|2015-09-09 14:07|2210981 192.168.10.110|8081|2015-09-07 14:02|2015-09-07 14:07|2310981 192.168.10.110|8081|2015-09-06 14:02|2015-09-06 14:07|2410981 192.168.10.110|8081|2015-09-05 14:02|2015-09-05 14:07|2510981