学习进度笔记

学习进度笔记28

回归算法

import org.apache.log4j.{Level, Logger}

import org.apache.spark.{SparkContext, SparkConf}

import org.apache.spark.mllib.regression.LinearRegressionWithSGD

import org.apache.spark.mllib.regression.LabeledPoint

import org.apache.spark.mllib.linalg.Vectors

 

object LinearRegression {

  def main(args:Array[String]): Unit ={

    // 屏蔽不必要的日志显示终端上

    Logger.getLogger("org.apache.spark").setLevel(Level.ERROR)

    Logger.getLogger("org.eclipse.jetty.server").setLevel(Level.OFF)

 

    // 设置运行环境

    val conf = new SparkConf().setAppName("Kmeans").setMaster("local[4]")

    val sc = new SparkContext(conf)

 

    // Load and parse the data

    val data = sc.textFile("/home/hadoop/upload/class8/lpsa.data")

    val parsedData = data.map { line =>

      val parts = line.split(',')

      LabeledPoint(parts(0).toDouble, Vectors.dense(parts(1).split(' ').map(_.toDouble)))

    }

 

    // Building the model

    val numIterations = 100

    val model = LinearRegressionWithSGD.train(parsedData, numIterations)

 

    // Evaluate model on training examples and compute training error

    val valuesAndPreds = parsedData.map { point =>

      val prediction = model.predict(point.features)

      (point.label, prediction)

    }

 

    val MSE = valuesAndPreds.map{ case(v, p) => math.pow((v - p), 2)}.reduce (_ + _) / valuesAndPreds.count

    println("training Mean Squared Error = " + MSE)

 

    sc.stop()

  }

}

 

posted @ 2021-02-07 07:40  城南漠北  阅读(59)  评论(0编辑  收藏  举报