1. PageRank
http://blog.csdn.net/hguisu/article/details/7996185
2. Connected Components
3. Triangle Counting
例子:
users.txt
1,BarackObama,Barack Obama 2,ladygaga,Goddess of Love 3,jeresig,John Resig 4,justinbieber,Justin Bieber 6,matei_zaharia,Matei Zaharia 7,odersky,Martin Odersky 8,anonsys
followers.txt
2 1 4 1 1 2 6 3 7 3 7 6 6 7 3 7
算法实战:
package main.scala import org.apache.spark.graphx.GraphLoader import org.apache.spark.{SparkConf, SparkContext} object graphx_algorism { System.setProperty("hadoop.home.dir","E:/zhuangji/winutil/") def main(args:Array[String]):Unit={ val conf=new SparkConf().setMaster("local[2]").setAppName("graph_algorism").set("spark.cores.max","10") //set spark.cores.max 可以设置核数 val sc=new SparkContext(conf) // graph初始化,从文件中读 val graph=GraphLoader.edgeListFile(sc,"E:/Java_WS/ScalaDemo/data/followers.txt") val users=sc.textFile("E:/Java_WS/ScalaDemo/data/users.txt").map{ line=>val fields=line.split(",") (fields(0).toLong,fields(1)) } // 1. //PageRank val ranks=graph.pageRank(0.001).vertices // 0.001 是PageRank 的参数,尚未知道是什么意思 ranks.collect.foreach(println) val ranksByUsername=users.join(ranks).map{ case(id,(username,rank))=>(username,rank) } println(ranksByUsername.collect().mkString("\n")) //2. // Connected Components: LianTongTi val cc=graph.connectedComponents().vertices println(cc.collect) val ccByUsername=users.join(cc).map{ case(id,(username,cc))=>(username,cc) } println(ccByUsername.collect().mkString("\n")) //3. //Triangle Count val graphT=GraphLoader.edgeListFile(sc,"E:/Java_WS/ScalaDemo/data/followers.txt",true).partitionBy(PartitionStrategy.RandomVertexCut) val triCounts=graphT.triangleCount().vertices val triCountByUsername=users.join(triCounts).map{case(id,(username,tc))=>(username,tc)} println(triCountByUsername.collect().mkString("\n")) }
每天进步一点点