You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by pw...@apache.org on 2014/01/14 07:59:33 UTC

[09/50] git commit: Move algorithms to GraphOps

Move algorithms to GraphOps


Project: http://git-wip-us.apache.org/repos/asf/incubator-spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spark/commit/d691e9f4
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spark/tree/d691e9f4
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spark/diff/d691e9f4

Branch: refs/heads/master
Commit: d691e9f47ed9b43b422712047183142d01c5e8c2
Parents: 20c509b
Author: Ankur Dave <an...@gmail.com>
Authored: Sun Jan 12 21:47:16 2014 -0800
Committer: Ankur Dave <an...@gmail.com>
Committed: Sun Jan 12 21:47:16 2014 -0800

----------------------------------------------------------------------
 docs/graphx-programming-guide.md                | 12 +---
 .../scala/org/apache/spark/graphx/Graph.scala   |  4 +-
 .../org/apache/spark/graphx/GraphOps.scala      | 51 ++++++++++++++-
 .../apache/spark/graphx/lib/Algorithms.scala    | 66 --------------------
 .../org/apache/spark/graphx/lib/package.scala   |  8 ---
 5 files changed, 54 insertions(+), 87 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/d691e9f4/docs/graphx-programming-guide.md
----------------------------------------------------------------------
diff --git a/docs/graphx-programming-guide.md b/docs/graphx-programming-guide.md
index 0e228d8..572afc1 100644
--- a/docs/graphx-programming-guide.md
+++ b/docs/graphx-programming-guide.md
@@ -667,9 +667,7 @@ things to worry about.)
 # Graph Algorithms
 <a name="graph_algorithms"></a>
 
-GraphX includes a set of graph algorithms in to simplify analytics. The algorithms are contained in the `org.apache.spark.graphx.lib` package and can be accessed directly as methods on `Graph` via an implicit conversion to [`Algorithms`][Algorithms]. This section describes the algorithms and how they are used.
-
-[Algorithms]: api/graphx/index.html#org.apache.spark.graphx.lib.Algorithms
+GraphX includes a set of graph algorithms in to simplify analytics. The algorithms are contained in the `org.apache.spark.graphx.lib` package and can be accessed directly as methods on `Graph` via [`GraphOps`][GraphOps]. This section describes the algorithms and how they are used.
 
 ## PageRank
 <a name="pagerank"></a>
@@ -681,8 +679,6 @@ GraphX comes with static and dynamic implementations of PageRank as methods on t
 [PageRank]: api/graphx/index.html#org.apache.spark.graphx.lib.PageRank$
 
 {% highlight scala %}
-// Load the implicit conversion to Algorithms
-import org.apache.spark.graphx.lib._
 // Load the datasets into a graph
 val users = sc.textFile("graphx/data/users.txt").map { line =>
   val fields = line.split("\\s+")
@@ -710,8 +706,7 @@ The connected components algorithm labels each connected component of the graph
 [ConnectedComponents]: api/graphx/index.html#org.apache.spark.graphx.lib.ConnectedComponents$
 
 {% highlight scala %}
-// Load the implicit conversion and graph as in the PageRank example
-import org.apache.spark.graphx.lib._
+// Load the graph as in the PageRank example
 val users = ...
 val followers = ...
 val graph = Graph(users, followers)
@@ -733,8 +728,7 @@ A vertex is part of a triangle when it has two adjacent vertices with an edge be
 [Graph.partitionBy]: api/graphx/index.html#org.apache.spark.graphx.Graph@partitionBy(PartitionStrategy):Graph[VD,ED]
 
 {% highlight scala %}
-// Load the implicit conversion and graph as in the PageRank example
-import org.apache.spark.graphx.lib._
+// Load the graph as in the PageRank example
 val users = ...
 // Load the edges in canonical order and partition the graph for triangle count
 val graph = GraphLoader.edgeListFile(sc, "graphx/data/followers.txt", true).partitionBy(RandomVertexCut)

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/d691e9f4/graphx/src/main/scala/org/apache/spark/graphx/Graph.scala
----------------------------------------------------------------------
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/Graph.scala b/graphx/src/main/scala/org/apache/spark/graphx/Graph.scala
index 56513ca..7d4f0de 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/Graph.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/Graph.scala
@@ -15,9 +15,7 @@ import org.apache.spark.storage.StorageLevel
  * RDDs, the graph is a functional data-structure in which mutating
  * operations return new graphs.
  *
- * @note [[GraphOps]] contains additional convenience operations.
- * [[lib.Algorithms]] contains graph algorithms; to access these,
- * import `org.apache.spark.graphx.lib._`.
+ * @note [[GraphOps]] contains additional convenience operations and graph algorithms.
  *
  * @tparam VD the vertex attribute type
  * @tparam ED the edge attribute type

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/d691e9f4/graphx/src/main/scala/org/apache/spark/graphx/GraphOps.scala
----------------------------------------------------------------------
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/GraphOps.scala b/graphx/src/main/scala/org/apache/spark/graphx/GraphOps.scala
index 4fdff29..2b3b95e 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/GraphOps.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/GraphOps.scala
@@ -2,9 +2,10 @@ package org.apache.spark.graphx
 
 import scala.reflect.ClassTag
 
-import org.apache.spark.rdd.RDD
 import org.apache.spark.SparkContext._
 import org.apache.spark.SparkException
+import org.apache.spark.graphx.lib._
+import org.apache.spark.rdd.RDD
 
 /**
  * Contains additional functionality for [[Graph]]. All operations are expressed in terms of the
@@ -298,4 +299,52 @@ class GraphOps[VD: ClassTag, ED: ClassTag](graph: Graph[VD, ED]) {
     Pregel(graph, initialMsg, maxIterations, activeDirection)(vprog, sendMsg, mergeMsg)
   }
 
+  /**
+   * Run a dynamic version of PageRank returning a graph with vertex attributes containing the
+   * PageRank and edge attributes containing the normalized edge weight.
+   *
+   * @see [[org.apache.spark.graphx.lib.PageRank]], method `runUntilConvergence`.
+   */
+  def pageRank(tol: Double, resetProb: Double = 0.15): Graph[Double, Double] = {
+    PageRank.runUntilConvergence(graph, tol, resetProb)
+  }
+
+  /**
+   * Run PageRank for a fixed number of iterations returning a graph with vertex attributes
+   * containing the PageRank and edge attributes the normalized edge weight.
+   *
+   * @see [[org.apache.spark.graphx.lib.PageRank]], method `run`.
+   */
+  def staticPageRank(numIter: Int, resetProb: Double = 0.15): Graph[Double, Double] = {
+    PageRank.run(graph, numIter, resetProb)
+  }
+
+  /**
+   * Compute the connected component membership of each vertex and return a graph with the vertex
+   * value containing the lowest vertex id in the connected component containing that vertex.
+   *
+   * @see [[org.apache.spark.graphx.lib.ConnectedComponents]]
+   */
+  def connectedComponents(): Graph[VertexID, ED] = {
+    ConnectedComponents.run(graph)
+  }
+
+  /**
+   * Compute the number of triangles passing through each vertex.
+   *
+   * @see [[org.apache.spark.graphx.lib.TriangleCount]]
+   */
+  def triangleCount(): Graph[Int, ED] = {
+    TriangleCount.run(graph)
+  }
+
+  /**
+   * Compute the strongly connected component (SCC) of each vertex and return a graph with the
+   * vertex value containing the lowest vertex id in the SCC containing that vertex.
+   *
+   * @see [[org.apache.spark.graphx.lib.StronglyConnectedComponents]]
+   */
+  def stronglyConnectedComponents(numIter: Int): Graph[VertexID, ED] = {
+    StronglyConnectedComponents.run(graph, numIter)
+  }
 } // end of GraphOps

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/d691e9f4/graphx/src/main/scala/org/apache/spark/graphx/lib/Algorithms.scala
----------------------------------------------------------------------
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/lib/Algorithms.scala b/graphx/src/main/scala/org/apache/spark/graphx/lib/Algorithms.scala
deleted file mode 100644
index cbcd9c2..0000000
--- a/graphx/src/main/scala/org/apache/spark/graphx/lib/Algorithms.scala
+++ /dev/null
@@ -1,66 +0,0 @@
-package org.apache.spark.graphx.lib
-
-import scala.reflect.ClassTag
-
-import org.apache.spark.graphx._
-
-/**
- * Provides graph algorithms directly on [[org.apache.spark.graphx.Graph]] via an implicit
- * conversion.
- * @example
- * {{{
- * import org.apache.spark.graph.lib._
- * val graph: Graph[_, _] = loadGraph()
- * graph.connectedComponents()
- * }}}
- */
-class Algorithms[VD: ClassTag, ED: ClassTag](self: Graph[VD, ED]) {
-  /**
-   * Run a dynamic version of PageRank returning a graph with vertex attributes containing the
-   * PageRank and edge attributes containing the normalized edge weight.
-   *
-   * @see [[org.apache.spark.graphx.lib.PageRank]], method `runUntilConvergence`.
-   */
-  def pageRank(tol: Double, resetProb: Double = 0.15): Graph[Double, Double] = {
-    PageRank.runUntilConvergence(self, tol, resetProb)
-  }
-
-  /**
-   * Run PageRank for a fixed number of iterations returning a graph with vertex attributes
-   * containing the PageRank and edge attributes the normalized edge weight.
-   *
-   * @see [[org.apache.spark.graphx.lib.PageRank]], method `run`.
-   */
-  def staticPageRank(numIter: Int, resetProb: Double = 0.15): Graph[Double, Double] = {
-    PageRank.run(self, numIter, resetProb)
-  }
-
-  /**
-   * Compute the connected component membership of each vertex and return a graph with the vertex
-   * value containing the lowest vertex id in the connected component containing that vertex.
-   *
-   * @see [[org.apache.spark.graphx.lib.ConnectedComponents]]
-   */
-  def connectedComponents(): Graph[VertexID, ED] = {
-    ConnectedComponents.run(self)
-  }
-
-  /**
-   * Compute the number of triangles passing through each vertex.
-   *
-   * @see [[org.apache.spark.graphx.lib.TriangleCount]]
-   */
-  def triangleCount(): Graph[Int, ED] = {
-    TriangleCount.run(self)
-  }
-
-  /**
-   * Compute the strongly connected component (SCC) of each vertex and return a graph with the
-   * vertex value containing the lowest vertex id in the SCC containing that vertex.
-   *
-   * @see [[org.apache.spark.graphx.lib.StronglyConnectedComponents]]
-   */
-  def stronglyConnectedComponents(numIter: Int): Graph[VertexID, ED] = {
-    StronglyConnectedComponents.run(self, numIter)
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/d691e9f4/graphx/src/main/scala/org/apache/spark/graphx/lib/package.scala
----------------------------------------------------------------------
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/lib/package.scala b/graphx/src/main/scala/org/apache/spark/graphx/lib/package.scala
deleted file mode 100644
index f6f2626..0000000
--- a/graphx/src/main/scala/org/apache/spark/graphx/lib/package.scala
+++ /dev/null
@@ -1,8 +0,0 @@
-package org.apache.spark.graphx
-
-import scala.reflect.ClassTag
-
-package object lib {
-  implicit def graphToAlgorithms[VD: ClassTag, ED: ClassTag](
-      graph: Graph[VD, ED]): Algorithms[VD, ED] = new Algorithms(graph)
-}