You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemds.apache.org by mb...@apache.org on 2022/06/04 20:35:51 UTC

[systemds] branch main updated: [SYSTEMDS-3384] Fix sparsity handling in shortestPath built-in function

This is an automated email from the ASF dual-hosted git repository.

mboehm7 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/systemds.git


The following commit(s) were added to refs/heads/main by this push:
     new 21c39c2489 [SYSTEMDS-3384] Fix sparsity handling in shortestPath built-in function
21c39c2489 is described below

commit 21c39c2489e60218f91ca1aef532262737a62117
Author: Matthias Boehm <mb...@gmail.com>
AuthorDate: Sat Jun 4 22:35:06 2022 +0200

    [SYSTEMDS-3384] Fix sparsity handling in shortestPath built-in function
    
    So far, the shortestPath builtin function densified the input graph
    by replacing zeros with Inf and then performing the dense operation
    colMins(G + minDistVector) repeatedly. Instead, we now use a specific
    colMins-colMax transform which keeps the large sparse graphs in sparse
    form. On a small-ish co-author graph of 32K authors, computing the
    shortest paths of author ID=1 to all other authors, improved from
    126s to 1s end to end (for 13 iterations).
---
 scripts/builtin/shortestPath.dml                   | 74 ++++++++++------------
 .../builtin/part2/BuiltinShortestPathTest.java     |  8 +--
 2 files changed, 37 insertions(+), 45 deletions(-)

diff --git a/scripts/builtin/shortestPath.dml b/scripts/builtin/shortestPath.dml
index 4aaf6304cd..abbdd1c133 100644
--- a/scripts/builtin/shortestPath.dml
+++ b/scripts/builtin/shortestPath.dml
@@ -23,76 +23,70 @@
 # 
 # Grzegorz Malewicz, Matthew H. Austern, Aart J. C. Bilk, 
 # James C. Dehnert, Ikkan Horn, Naty Leiser and Grzegorz Czajkowski:
-# Pregel: A System for Large-Scale Graph Processing
+# Pregel: A System for Large-Scale Graph Processing, SIGMOD 2010
 #
 # INPUT PARAMETERS:
 # ----------------------------------------------------------------------------------------------------------------------
 # NAME        TYPE              DEFAULT  MEANING
 # ----------------------------------------------------------------------------------------------------------------------
 # G           Matrix[Double]    ---      adjacency matrix of the labeled graph: Such graph can be directed
-#                                         (G is symmetric) or undirected (G is not symmetric).
-#                                         The values of G can be 0/1 (just specifying whether the nodes
-#                                         are connected or not) or integer values (representing the weight
-#                                         of the edges or the distances between nodes, 0 if not connected).
+#                                        (G is symmetric) or undirected (G is not symmetric).
+#                                        The values of G can be 0/1 (just specifying whether the nodes
+#                                        are connected or not) or integer values (representing the weight
+#                                        of the edges or the distances between nodes, 0 if not connected).
 # maxi        Integer           0        Integer max number of iterations accepted (0 for FALSE, i.e.
-#                                         max number of iterations not defined)
-# sourceNode  Integer                     node index to calculate the shortest paths to all other nodes.
+#                                        max number of iterations not defined)
+# sourceNode  Integer                    node index to calculate the shortest paths to all other nodes.
 # verbose     Boolean           FALSE    flag for verbose debug output
 #
 # ----------------------------------------------------------------------------------------------------------------------
 #
 # OUTPUT:
 # ----------------------------------------------------------------------------------------------------------------------
-# NAME       TYPE                         MEANING
+# NAME        TYPE                       MEANING
 # ----------------------------------------------------------------------------------------------------------------------
-# C          Matrix[Double]               Output matrix (double) of minimum distances (shortest-path) between
-#                                         vertices: The value of the ith row and the jth column of the output
-#                                         matrix is the minimum distance shortest-path from vertex i to vertex j.
-#                                         When the value of the minimum distance is infinity, the two nodes are
-#                                         not connected.
+# C           Matrix[Double]             Output matrix (double) of minimum distances (shortest-path) between
+#                                        vertices: The value of the ith row and the jth column of the output
+#                                        matrix is the minimum distance shortest-path from vertex i to vertex j.
+#                                        When the value of the minimum distance is infinity, the two nodes are
+#                                        not connected.
 # ----------------------------------------------------------------------------------------------------------------------
 
 m_shortestPath = function(Matrix[Double] G, Integer maxi = 0, Integer sourceNode, Boolean verbose = FALSE) 
   return (Matrix[Double] C) 
 {
-  if(verbose) {
-    print("SHORTEST PATH CALCULATION");
-  }
-
-  if(min(G) < 0){
-    stop("All values in G must be positive")
-  }
+  if(verbose)
+    print("SHORTEST PATH CALCULATION:");
 
-  if(ncol(G) != nrow(G)){
-    stop("Not correct matrix dimensions")
-  }
-
-  matrixSize = nrow(G)
-
-  G = replace(target=G, pattern=0, replacement=Inf)
+  if(min(G) < 0)
+    stop("Shortest Path: All values in G must be positive.")
+  if(ncol(G) != nrow(G))
+    stop("Shortest Path: input graph G must be a squared matrix.")
 
   # initialize the matrix of minimum distances with "infinity" values:
-  minDistVector = matrix(Inf,rows=matrixSize,cols=1)
+  minDist = matrix(Inf, rows=nrow(G), cols=1);
 
   # update minimum distance from the sourceNode to itself to 0:
-  minDistVector[sourceNode,1] = 0
+  minDist[sourceNode, 1] = 0;
 
   iter = 1
   diff = Inf;
   while( diff > 0 & (maxi==0 | iter<=maxi) ) {
-    w = t(colMins(G + minDistVector))
-    u = min(w, minDistVector);
-    diff = sum(u != minDistVector)
-    minDistVector = u; # update assignment
-    if( verbose ){
+    # avoid densification of 'colMins(G + minDist)' via colMin-colMax transform
+    # (we exploit here that ^x with x!=0 is treated as sparse-safe and otherwise
+    # Inf or NaN and replaced with 0, which keeps large sparse graphs sparse) 
+    Gp = (G + (G!=0) * minDist)^(-1);
+    Gp = replace(target=Gp, pattern=Inf, replacement=0);
+    Gp = replace(target=Gp, pattern=NaN, replacement=0);
+    u = min(t(1/colMaxs(Gp)), minDist);
+    diff = sum(u != minDist);
+    minDist = u; # update assignment
+    if( verbose )
       print("Shortest Path: iter = "+iter+", #diff = "+diff);
-    }
     iter = iter + 1;
   }
 
-  C=minDistVector
-  if(verbose) {
-    print("SHORTEST PATH CALCULATION FINISHED, CHECK OUTPUT MATRIX OF MINIMUM DISTANCES:");
-    print(toString(C))
-  }
+  C = minDist;
+  if(verbose)
+    print("Shortest Path: \n"+toString(C));
 }
diff --git a/src/test/java/org/apache/sysds/test/functions/builtin/part2/BuiltinShortestPathTest.java b/src/test/java/org/apache/sysds/test/functions/builtin/part2/BuiltinShortestPathTest.java
index 618e39b229..aadcf794b2 100644
--- a/src/test/java/org/apache/sysds/test/functions/builtin/part2/BuiltinShortestPathTest.java
+++ b/src/test/java/org/apache/sysds/test/functions/builtin/part2/BuiltinShortestPathTest.java
@@ -33,7 +33,7 @@ public class BuiltinShortestPathTest extends AutomatedTestBase {
 	private final static String TEST_DIR = "functions/builtin/";
 	private static final String TEST_CLASS_DIR = TEST_DIR + BuiltinShortestPathTest.class.getSimpleName() + "/";
 	private final static double eps = 1e-10;
-	
+
 	@Override
 	public void setUp() {
 		TestUtils.clearAssertionInformation();
@@ -44,18 +44,16 @@ public class BuiltinShortestPathTest extends AutomatedTestBase {
 	public void testShortestPathNode1CP() {
 		runShortestPathNodeTest(1, new double[][] {{0}, {2}, {5}, {5}});
 	}
-	
+
 	@Test
 	public void testShortestPathNode2CP() {
 		runShortestPathNodeTest(2, new double[][] {{1}, {0}, {4}, {5}});
 	}
-	
+
 	@Test
 	public void testShortestPathNode3CP() {
 		runShortestPathNodeTest(3, new double[][] {{4}, {3}, {0}, {1}});
 	}
-	
-	
 
 	private void runShortestPathNodeTest(int node, double [][] Res) {
 		loadTestConfiguration(getTestConfiguration(TEST_NAME));