You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by pw...@apache.org on 2014/01/14 07:59:57 UTC

[33/50] git commit: Updated doc for PageRank.

Updated doc for PageRank.


Project: http://git-wip-us.apache.org/repos/asf/incubator-spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spark/commit/0b18bfba
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spark/tree/0b18bfba
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spark/diff/0b18bfba

Branch: refs/heads/master
Commit: 0b18bfba1aba60c2a1f576f10d9ab2fa316ebfa0
Parents: 9317286
Author: Reynold Xin <rx...@apache.org>
Authored: Mon Jan 13 18:51:04 2014 -0800
Committer: Reynold Xin <rx...@apache.org>
Committed: Mon Jan 13 18:51:04 2014 -0800

----------------------------------------------------------------------
 .../org/apache/spark/graphx/lib/PageRank.scala  | 86 +++++++++-----------
 1 file changed, 39 insertions(+), 47 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/0b18bfba/graphx/src/main/scala/org/apache/spark/graphx/lib/PageRank.scala
----------------------------------------------------------------------
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/lib/PageRank.scala b/graphx/src/main/scala/org/apache/spark/graphx/lib/PageRank.scala
index b205669..08256dc 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/lib/PageRank.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/lib/PageRank.scala
@@ -5,7 +5,42 @@ import scala.reflect.ClassTag
 import org.apache.spark.Logging
 import org.apache.spark.graphx._
 
-/** PageRank algorithm implementation. */
+/**
+ * PageRank algorithm implementation. There are two implementations of PageRank implemented.
+ *
+ * The first implementation uses the [[Pregel]] interface and runs PageRank for a fixed number
+ * of iterations:
+ * {{{
+ * var PR = Array.fill(n)( 1.0 )
+ * val oldPR = Array.fill(n)( 1.0 )
+ * for( iter <- 0 until numIter ) {
+ *   swap(oldPR, PR)
+ *   for( i <- 0 until n ) {
+ *     PR[i] = alpha + (1 - alpha) * inNbrs[i].map(j => oldPR[j] / outDeg[j]).sum
+ *   }
+ * }
+ * }}}
+ *
+ * The second implementation uses the standalone [[Graph]] interface and runs PageRank until
+ * convergence:
+ *
+ * {{{
+ * var PR = Array.fill(n)( 1.0 )
+ * val oldPR = Array.fill(n)( 0.0 )
+ * while( max(abs(PR - oldPr)) > tol ) {
+ *   swap(oldPR, PR)
+ *   for( i <- 0 until n if abs(PR[i] - oldPR[i]) > tol ) {
+ *     PR[i] = alpha + (1 - \alpha) * inNbrs[i].map(j => oldPR[j] / outDeg[j]).sum
+ *   }
+ * }
+ * }}}
+ *
+ * `alpha` is the random reset probability (typically 0.15), `inNbrs[i]` is the set of
+ * neighbors whick link to `i` and `outDeg[j]` is the out degree of vertex `j`.
+ *
+ * Note that this is not the "normalized" PageRank and as a consequence pages that have no
+ * inlinks will have a PageRank of alpha.
+ */
 object PageRank extends Logging {
 
   /**
@@ -13,26 +48,6 @@ object PageRank extends Logging {
    * with vertex attributes containing the PageRank and edge
    * attributes the normalized edge weight.
    *
-   * The following PageRank fixed point is computed for each vertex.
-   *
-   * {{{
-   * var PR = Array.fill(n)( 1.0 )
-   * val oldPR = Array.fill(n)( 1.0 )
-   * for( iter <- 0 until numIter ) {
-   *   swap(oldPR, PR)
-   *   for( i <- 0 until n ) {
-   *     PR[i] = alpha + (1 - alpha) * inNbrs[i].map(j => oldPR[j] / outDeg[j]).sum
-   *   }
-   * }
-   * }}}
-   *
-   * where `alpha` is the random reset probability (typically 0.15),
-   * `inNbrs[i]` is the set of neighbors whick link to `i` and
-   * `outDeg[j]` is the out degree of vertex `j`.
-   *
-   * Note that this is not the "normalized" PageRank and as a consequence pages that have no
-   * inlinks will have a PageRank of alpha.
-   *
    * @tparam VD the original vertex attribute (not used)
    * @tparam ED the original edge attribute (not used)
    *
@@ -47,16 +62,11 @@ object PageRank extends Logging {
   def run[VD: ClassTag, ED: ClassTag](
       graph: Graph[VD, ED], numIter: Int, resetProb: Double = 0.15): Graph[Double, Double] =
   {
-
-    /**
-     * Initialize the pagerankGraph with each edge attribute having
-     * weight 1/outDegree and each vertex with attribute 1.0.
-     */
+    // Initialize the pagerankGraph with each edge attribute having
+    // weight 1/outDegree and each vertex with attribute 1.0.
     val pagerankGraph: Graph[Double, Double] = graph
       // Associate the degree with each vertex
-      .outerJoinVertices(graph.outDegrees){
-      (vid, vdata, deg) => deg.getOrElse(0)
-    }
+      .outerJoinVertices(graph.outDegrees) { (vid, vdata, deg) => deg.getOrElse(0) }
       // Set the weight on the edges based on the degree
       .mapTriplets( e => 1.0 / e.srcAttr )
       // Set the vertex attributes to the initial pagerank values
@@ -85,23 +95,6 @@ object PageRank extends Logging {
    * Run a dynamic version of PageRank returning a graph with vertex attributes containing the
    * PageRank and edge attributes containing the normalized edge weight.
    *
-   * {{{
-   * var PR = Array.fill(n)( 1.0 )
-   * val oldPR = Array.fill(n)( 0.0 )
-   * while( max(abs(PR - oldPr)) > tol ) {
-   *   swap(oldPR, PR)
-   *   for( i <- 0 until n if abs(PR[i] - oldPR[i]) > tol ) {
-   *     PR[i] = alpha + (1 - \alpha) * inNbrs[i].map(j => oldPR[j] / outDeg[j]).sum
-   *   }
-   * }
-   * }}}
-   *
-   * where `alpha` is the random reset probability (typically 0.15), `inNbrs[i]` is the set of
-   * neighbors whick link to `i` and `outDeg[j]` is the out degree of vertex `j`.
-   *
-   * Note that this is not the "normalized" PageRank and as a consequence pages that have no
-   * inlinks will have a PageRank of alpha.
-   *
    * @tparam VD the original vertex attribute (not used)
    * @tparam ED the original edge attribute (not used)
    *
@@ -157,5 +150,4 @@ object PageRank extends Logging {
       vertexProgram, sendMessage, messageCombiner)
       .mapVertices((vid, attr) => attr._1)
   } // end of deltaPageRank
-
 }