subgraph示例

import org.apache.spark._
import org.apache.spark.graphx._

import org.apache.spark.rdd.RDD
val users: RDD[(VertexId, (String, String))] =
    sc.parallelize(Array(
        (3L, ("rxin", "student")),
        (7L, ("jgonzal", "postdoc")),
        (5L, ("franklin", "prof")),
        (2L, ("istoica", "prof")),
        (4L, ("peter", "student"))
    ))
users = ParallelCollectionRDD[62] at parallelize at <console>:49






ParallelCollectionRDD[62] at parallelize at <console>:49
val relationships: RDD[Edge[String]] =
    sc.parallelize(Array(
        Edge(3L, 7L, "collab"),
        Edge(5L, 3L, "advisor"),
        Edge(2L, 5L, "colleague"),
        Edge(4L, 0L, "student"),
        Edge(5L, 0L, "colleague")
    ))
relationships = ParallelCollectionRDD[63] at parallelize at <console>:49






ParallelCollectionRDD[63] at parallelize at <console>:49
val defaultUser = ("John Doe", "Missing")
defaultUser = (John Doe,Missing)






(John Doe,Missing)
val graph = Graph(users, relationships, defaultUser)
graph = org.apache.spark.graphx.impl.GraphImpl@718b0840






org.apache.spark.graphx.impl.GraphImpl@718b0840
graph.vertices.collect.foreach(println(_))
(0,(John Doe,Missing))
(2,(istoica,prof))
(3,(rxin,student))
(4,(peter,student))
(5,(franklin,prof))
(7,(jgonzal,postdoc))
graph.triplets
    .map(triplet => triplet.srcAttr._1 + " is the " + triplet.attr + " of " + triplet.dstAttr._1)
    .collect.foreach(println(_))
rxin is the collab of jgonzal
franklin is the advisor of rxin
istoica is the colleague of franklin
peter is the student of John Doe
franklin is the colleague of John Doe

去除Missing属性的节点

val removeMissingGraph = graph.subgraph(vpred = (id, attr) => attr._2 != "Missing")
removeMissingGraph = org.apache.spark.graphx.impl.GraphImpl@21974f8a






org.apache.spark.graphx.impl.GraphImpl@21974f8a
removeMissingGraph.vertices.collect.foreach(println(_))
(2,(istoica,prof))
(3,(rxin,student))
(4,(peter,student))
(5,(franklin,prof))
(7,(jgonzal,postdoc))
removeMissingGraph.triplets
    .map(triplet => triplet.srcAttr._1 + " is the " + triplet.attr + " of " + triplet.dstAttr._1)
    .collect.foreach(println(_))
rxin is the collab of jgonzal
franklin is the advisor of rxin
istoica is the colleague of franklin

去除Pro属性的节点

val removeProfGraph = graph.subgraph(vpred = (id, attr) => attr._2 != "prof")
removeProfGraph.vertices.collect.foreach(println(_))
(0,(John Doe,Missing))
(3,(rxin,student))
(4,(peter,student))
(7,(jgonzal,postdoc))



removeProfGraph = org.apache.spark.graphx.impl.GraphImpl@4ed527a2






org.apache.spark.graphx.impl.GraphImpl@4ed527a2
removeProfGraph.triplets
    .map(triplet => triplet.srcAttr._1 + " is the " + triplet.attr + " of " + triplet.dstAttr._1)
    .collect.foreach(println(_))
rxin is the collab of jgonzal
peter is the student of John Doe

posted @ 2018-12-20 11:40  DataNerd  阅读(478)  评论(0编辑  收藏  举报