spark 异常值过滤 IQR
def getIQR(df:DataFrame,colName:String):Array[Double]={
val tmpDf = df.withColumn(colName,
col(colName).cast(DoubleType))
val stats = tmpDf.stat.approxQuantile(colName,
Array(0.25,0.5,0.7),
0.1)
val Q1 = stats(0)
val Q2 = stats(1)
val Q3 = stats(2)
val IQR = Q3-Q1
val lowerRange = Q1-1.5*IQR
val upperRange = Q3+1.5*IQR
Array(lowerRange,upperRange)
}