spark MLlib collaborativeFilltering学习
1 package ML.collaborativeFilltering; 2 3 import org.apache.spark.SparkConf; 4 import org.apache.spark.api.java.JavaDoubleRDD; 5 import org.apache.spark.api.java.JavaPairRDD; 6 import org.apache.spark.api.java.JavaRDD; 7 import org.apache.spark.api.java.JavaSparkContext; 8 import org.apache.spark.api.java.function.Function; 9 import org.apache.spark.mllib.recommendation.ALS; 10 import org.apache.spark.mllib.recommendation.MatrixFactorizationModel; 11 import org.apache.spark.mllib.recommendation.Rating; 12 import scala.Tuple2; 13 14 /** 15 * TODO 16 * 17 * @ClassName: example 18 * @author: DingH 19 * @since: 2019/4/10 16:03 20 */ 21 public class example { 22 public static void main(String[] args) { 23 SparkConf conf = new SparkConf().setAppName("Java Collaborative Filtering Example"); 24 JavaSparkContext jsc = new JavaSparkContext(conf); 25 26 // Load and parse the data 27 String path = "D:\\IdeaProjects\\SimpleApp\\src\\main\\resources\\data\\mllib\\als\\test.data"; 28 JavaRDD<String> data = jsc.textFile(path); 29 JavaRDD<Rating> ratings = data.map(new Function<String, Rating>() { 30 public Rating call(String s) { 31 String[] sarray = s.split(","); 32 return new Rating(Integer.parseInt(sarray[0]), Integer.parseInt(sarray[1]), Double.parseDouble(sarray[2])); 33 } 34 } 35 ); 36 int ranks = 10; 37 int numIterations = 10; 38 MatrixFactorizationModel model = ALS.train(ratings.rdd(), ranks, numIterations); 39 40 JavaRDD<Tuple2<Object, Object>> userProducts = ratings.map(new Function<Rating, Tuple2<Object, Object>>() { 41 public Tuple2<Object, Object> call(Rating r) { 42 return new Tuple2<Object, Object>(r.user(), r.product()); 43 } 44 } 45 ); 46 JavaPairRDD<Tuple2<Integer, Integer>, Double> predictions = JavaPairRDD.fromJavaRDD(model.predict(JavaRDD.toRDD(userProducts)).toJavaRDD().map( 47 new Function<Rating, Tuple2<Tuple2<Integer, Integer>, Double>>() { 48 public Tuple2<Tuple2<Integer, Integer>, Double> call(Rating r){ 49 return new Tuple2<Tuple2<Integer, Integer>, Double>( 50 new Tuple2<Integer, Integer>(r.user(), r.product()), r.rating()); 51 } 52 } 53 )); 54 55 JavaRDD<Tuple2<Double, Double>> ratesAndPreds = JavaPairRDD.fromJavaRDD(ratings.map( 56 new Function<Rating, Tuple2<Tuple2<Integer, Integer>, Double>>() { 57 public Tuple2<Tuple2<Integer, Integer>, Double> call(Rating r){ 58 return new Tuple2<Tuple2<Integer, Integer>, Double>( 59 new Tuple2<Integer, Integer>(r.user(), r.product()), r.rating()); 60 } 61 } 62 )).join(predictions).values(); 63 64 double MSE = JavaDoubleRDD.fromRDD(ratesAndPreds.map( 65 new Function<Tuple2<Double, Double>, Object>() { 66 public Object call(Tuple2<Double, Double> pair) { 67 Double err = pair._1() - pair._2(); 68 return err * err; 69 } 70 } 71 ).rdd()).mean(); 72 73 System.out.println("Mean Squared Error = " + MSE); 74 75 76 77 78 } 79 }
分类:
Spark
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· 从 HTTP 原因短语缺失研究 HTTP/2 和 HTTP/3 的设计差异
· AI与.NET技术实操系列:向量存储与相似性搜索在 .NET 中的实现
· 基于Microsoft.Extensions.AI核心库实现RAG应用
· Linux系列:如何用heaptrack跟踪.NET程序的非托管内存泄露
· 开发者必知的日志记录最佳实践
· winform 绘制太阳,地球,月球 运作规律
· AI与.NET技术实操系列(五):向量存储与相似性搜索在 .NET 中的实现
· 超详细:普通电脑也行Windows部署deepseek R1训练数据并当服务器共享给他人
· 【硬核科普】Trae如何「偷看」你的代码?零基础破解AI编程运行原理
· 上周热点回顾(3.3-3.9)