neo4j-(12)-spark操作
创建对象
<dependencies>
<!-- list of dependencies -->
<dependency>
<groupId>org.neo4j</groupId>
<artifactId>neo4j-connector-apache-spark_2.11</artifactId>
<version>4.0.1_for_spark_2.4</version>
</dependency>
</dependencies>
<repositories>
<!-- list of other repositories -->
<repository>
<id>SparkPackagesRepo</id>
<url>http://dl.bintray.com/spark-packages/maven</url>
</repository>
</repositories>
使用sparkSession配置
val spark=SparkSession.builder().appName("play")
.master("local[*]")
.config("spark.neo4j.bolt.url", "bolt://localhost:7687")
.config("spark.neo4j.bolt.user", "neo4j")
.config("spark.neo4j.bolt.password", "hortmt")
.getOrCreate()
import spark.implicits._
val neo=Neo4j(spark.sparkContext)
使用 sparkConf配置
val conf = new SparkConf().setAppName("neoej")
.setMaster("local[*]")
.set("spark.neo4j.bolt.url", "bolt://localhost:7687")
.set("spark.neo4j.bolt.user", "neo4j")
.set("spark.neo4j.bolt.password", "hortmt")
val sc =new SparkContext(conf)
val neo=Neo4j(sc)
通过 Neo4jConfig 来做配置
val sparkSession = SparkSession.builder()
.master("local[*]")
.appName("LoadDataToNeo4j")
.getOrCreate();
val sc = sparkSession.sparkContext
val config = Neo4jConfig("localhost:","neo4j",Option("root"))
Neo4j(sc).cypher("CREATE (c:Client {id:1230}) return c").loadRdd
sparkSession.close()
返回类型
选择要返回的数据类型
loadRowRdd
,loadNodeRdds
,loadRelRdd
,loadRdd[T]
loadDataFrame
,loadDataFrame(schema)
loadGraph[VD,ED]
loadGraphFrame[VD,ED]
使用
loadRowRdd
import org.neo4j.spark._
val neo = Neo4j(sc)
val rdd = neo.cypher("MATCH (n:Person) RETURN id(n) as id ").loadRowRdd
rdd.count
// inferred schema
rdd.first.schema.fieldNames
// => ["id"]
rdd.first.schema("id")
// => StructField(id,LongType,true)
neo.cypher("MATCH (n:Person) RETURN id(n)").loadRdd[Long].mean
// => res30: Double = 236696.5
neo.cypher("MATCH (n:Person) WHERE n.id <= {maxId} RETURN n.id").param("maxId", 10).loadRowRdd.count
// => res34: Long = 10
loadGraph
import org.neo4j.spark._
val neo = Neo4j(sc)
import org.graphframes._
val graphFrame = neo.pattern(("Person","id"),("KNOWS",null), ("Person","id")).partitions(3).rows(1000).loadGraphFrame
graphFrame.vertices.count
// => 100
graphFrame.edges.count
// => 1000
val pageRankFrame = graphFrame.pageRank.maxIter(5).run()
val ranked = pageRankFrame.vertices
ranked.printSchema()
val top3 = ranked.orderBy(ranked.col("pagerank").desc).take(3)
// => top3: Array[org.apache.spark.sql.Row]
// => Array([236716,70,0.62285...], [236653,7,0.62285...], [236658,12,0.62285])