Graphx Example 1

triplets example: Students, Professor Relationship

1
import org.apache.spark._
2
import org.apache.spark.graphx._
3
import org.apache.spark.rdd.RDD
4
​
5
// Create an RDD for the vertices
6
val users: RDD[(VertexId, (String, String))] =
7
sc.parallelize(Array((3L, ("rxin", "student")), (7L, ("jgonzal", "postdoc")),
8
(5L, ("franklin", "prof")), (2L, ("istoica", "prof")),
9
(4L, ("peter", "student"))))
10
// Create an RDD for edges
11
val relationships: RDD[Edge[String]] =
12
sc.parallelize(Array(Edge(3L, 7L, "collab"), Edge(5L, 3L, "advisor"),
13
Edge(2L, 5L, "colleague"), Edge(5L, 7L, "pi"),
14
Edge(4L, 0L, "student"), Edge(5L, 0L, "colleague")))
15
// Define a default user in case there are relationship with missing user
16
val defaultUser = ("John Doe", "Missing")
17
// Build the initial Graph
18
// Note: Graph() is an abstract class
19
// While can not instantiate, you see no new key word
20
// but class methods of it can be invoked
21
// Therefore, this is what graph is for
22
val graph = Graph(users, relationships, defaultUser)
23
// Notice that there is a user 0 (for which we have no information) connected to users
24
// 4 (peter) and 5 (franklin).
25
graph.triplets.map(
26
triplet => triplet.srcAttr._1 + " is the " + triplet.attr + " of " + triplet.dstAttr._1
27
).collect.foreach(println(_))
28
// Remove missing vertices as well as the edges to connected to them
29
val validGraph = graph.subgraph(vpred = (id, attr) => attr._2 != "Missing")
30
// The valid subgraph will disconnect users 4 and 5 by removing user 0
31
validGraph.vertices.collect.foreach(println(_))
32
validGraph.triplets.map(
33
triplet => triplet.srcAttr._1 + " is the " + triplet.attr + " of " + triplet.dstAttr._1
34
).collect.foreach(println(_))
35
​
36
/*
37
​
38
Output:
39
istoica is the colleague of franklin
40
rxin is the collab of jgonzal
41
franklin is the advisor of rxin
42
peter is the student of John Doe
43
franklin is the colleague of John Doe
44
franklin is the pi of jgonzal
45
​
46
(4,(peter,student))
47
(2,(istoica,prof))
48
(3,(rxin,student))
49
(7,(jgonzal,postdoc))
50
(5,(franklin,prof))
51
​
52
istoica is the colleague of franklin
53
rxin is the collab of jgonzal
54
franklin is the advisor of rxin
55
franklin is the pi of jgonzal
56
*/
Copied!
​
Last modified 1yr ago
Copy link