spark机器学习
多层感知器(MLP)
1 from __future__ import print_function 2 from pyspark.ml.classification import MultilayerPerceptronClassifier 3 from pyspark.ml.evaluation import MulticlassClassificationEvaluator 4 from pyspark.sql import SparkSession 5 6 spark = SparkSession\ 7 .builder.appName("multilayer_perceptron_classification_example").getOrCreate() 8 9 # 加载数据 10 data = spark.read.format("libsvm")\ 11 .load("data/mllib/sample_multiclass_classification_data.txt") 12 13 # 切分训练集和测试集 14 splits = data.randomSplit([0.6, 0.4], 1234) 15 train = splits[0] 16 test = splits[1] 17 18 # 输入、隐层、隐层、输出个数 19 layers = [4, 5, 4, 3] 20 21 # 创建多层感知器 22 trainer = MultilayerPerceptronClassifier(maxIter=100, layers=layers, blockSize=128, seed=1234) 23 24 # 训练模型 25 model = trainer.fit(train) 26 27 # 预测和计算准确度 28 result = model.transform(test) 29 result.show() 30 predictionAndLabels = result.select("prediction", "label") 31 evaluator = MulticlassClassificationEvaluator(metricName="accuracy") 32 print("Test set accuracy = " + str(evaluator.evaluate(predictionAndLabels))) 33 34 spark.stop()
+-----+--------------------+----------+ |label| features|prediction| +-----+--------------------+----------+ | 0.0|(4,[0,1,2,3],[-0....| 2.0| | 0.0|(4,[0,1,2,3],[-0....| 0.0| | 0.0|(4,[0,1,2,3],[-0....| 0.0| | 0.0|(4,[0,1,2,3],[-0....| 2.0| | 0.0|(4,[0,1,2,3],[-0....| 2.0| | 0.0|(4,[0,1,2,3],[-1....| 2.0| | 0.0|(4,[0,1,2,3],[0.1...| 0.0| | 0.0|(4,[0,1,2,3],[0.2...| 0.0| | 0.0|(4,[0,1,2,3],[0.3...| 0.0| | 0.0|(4,[0,1,2,3],[0.3...| 0.0| | 0.0|(4,[0,1,2,3],[0.3...| 0.0| | 0.0|(4,[0,1,2,3],[0.4...| 0.0| | 0.0|(4,[0,1,2,3],[0.5...| 0.0| | 0.0|(4,[0,1,2,3],[0.7...| 0.0| | 0.0|(4,[0,1,2,3],[0.8...| 0.0| | 0.0|(4,[0,1,2,3],[1.0...| 0.0| | 0.0|(4,[0,2,3],[0.166...| 0.0| | 0.0|(4,[0,2,3],[0.388...| 0.0| | 1.0|(4,[0,1,2,3],[-0....| 1.0| | 1.0|(4,[0,1,2,3],[-0....| 1.0| +-----+--------------------+----------+ only showing top 20 rows Test set accuracy = 0.901960784314