You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemds.apache.org by mb...@apache.org on 2022/06/04 20:35:51 UTC
[systemds] branch main updated: [SYSTEMDS-3384] Fix sparsity handling in shortestPath built-in function
This is an automated email from the ASF dual-hosted git repository.
mboehm7 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/systemds.git
The following commit(s) were added to refs/heads/main by this push:
new 21c39c2489 [SYSTEMDS-3384] Fix sparsity handling in shortestPath built-in function
21c39c2489 is described below
commit 21c39c2489e60218f91ca1aef532262737a62117
Author: Matthias Boehm <mb...@gmail.com>
AuthorDate: Sat Jun 4 22:35:06 2022 +0200
[SYSTEMDS-3384] Fix sparsity handling in shortestPath built-in function
So far, the shortestPath builtin function densified the input graph
by replacing zeros with Inf and then performing the dense operation
colMins(G + minDistVector) repeatedly. Instead, we now use a specific
colMins-colMax transform which keeps the large sparse graphs in sparse
form. On a small-ish co-author graph of 32K authors, computing the
shortest paths of author ID=1 to all other authors, improved from
126s to 1s end to end (for 13 iterations).
---
scripts/builtin/shortestPath.dml | 74 ++++++++++------------
.../builtin/part2/BuiltinShortestPathTest.java | 8 +--
2 files changed, 37 insertions(+), 45 deletions(-)
diff --git a/scripts/builtin/shortestPath.dml b/scripts/builtin/shortestPath.dml
index 4aaf6304cd..abbdd1c133 100644
--- a/scripts/builtin/shortestPath.dml
+++ b/scripts/builtin/shortestPath.dml
@@ -23,76 +23,70 @@
#
# Grzegorz Malewicz, Matthew H. Austern, Aart J. C. Bilk,
# James C. Dehnert, Ikkan Horn, Naty Leiser and Grzegorz Czajkowski:
-# Pregel: A System for Large-Scale Graph Processing
+# Pregel: A System for Large-Scale Graph Processing, SIGMOD 2010
#
# INPUT PARAMETERS:
# ----------------------------------------------------------------------------------------------------------------------
# NAME TYPE DEFAULT MEANING
# ----------------------------------------------------------------------------------------------------------------------
# G Matrix[Double] --- adjacency matrix of the labeled graph: Such graph can be directed
-# (G is symmetric) or undirected (G is not symmetric).
-# The values of G can be 0/1 (just specifying whether the nodes
-# are connected or not) or integer values (representing the weight
-# of the edges or the distances between nodes, 0 if not connected).
+# (G is symmetric) or undirected (G is not symmetric).
+# The values of G can be 0/1 (just specifying whether the nodes
+# are connected or not) or integer values (representing the weight
+# of the edges or the distances between nodes, 0 if not connected).
# maxi Integer 0 Integer max number of iterations accepted (0 for FALSE, i.e.
-# max number of iterations not defined)
-# sourceNode Integer node index to calculate the shortest paths to all other nodes.
+# max number of iterations not defined)
+# sourceNode Integer node index to calculate the shortest paths to all other nodes.
# verbose Boolean FALSE flag for verbose debug output
#
# ----------------------------------------------------------------------------------------------------------------------
#
# OUTPUT:
# ----------------------------------------------------------------------------------------------------------------------
-# NAME TYPE MEANING
+# NAME TYPE MEANING
# ----------------------------------------------------------------------------------------------------------------------
-# C Matrix[Double] Output matrix (double) of minimum distances (shortest-path) between
-# vertices: The value of the ith row and the jth column of the output
-# matrix is the minimum distance shortest-path from vertex i to vertex j.
-# When the value of the minimum distance is infinity, the two nodes are
-# not connected.
+# C Matrix[Double] Output matrix (double) of minimum distances (shortest-path) between
+# vertices: The value of the ith row and the jth column of the output
+# matrix is the minimum distance shortest-path from vertex i to vertex j.
+# When the value of the minimum distance is infinity, the two nodes are
+# not connected.
# ----------------------------------------------------------------------------------------------------------------------
m_shortestPath = function(Matrix[Double] G, Integer maxi = 0, Integer sourceNode, Boolean verbose = FALSE)
return (Matrix[Double] C)
{
- if(verbose) {
- print("SHORTEST PATH CALCULATION");
- }
-
- if(min(G) < 0){
- stop("All values in G must be positive")
- }
+ if(verbose)
+ print("SHORTEST PATH CALCULATION:");
- if(ncol(G) != nrow(G)){
- stop("Not correct matrix dimensions")
- }
-
- matrixSize = nrow(G)
-
- G = replace(target=G, pattern=0, replacement=Inf)
+ if(min(G) < 0)
+ stop("Shortest Path: All values in G must be positive.")
+ if(ncol(G) != nrow(G))
+ stop("Shortest Path: input graph G must be a squared matrix.")
# initialize the matrix of minimum distances with "infinity" values:
- minDistVector = matrix(Inf,rows=matrixSize,cols=1)
+ minDist = matrix(Inf, rows=nrow(G), cols=1);
# update minimum distance from the sourceNode to itself to 0:
- minDistVector[sourceNode,1] = 0
+ minDist[sourceNode, 1] = 0;
iter = 1
diff = Inf;
while( diff > 0 & (maxi==0 | iter<=maxi) ) {
- w = t(colMins(G + minDistVector))
- u = min(w, minDistVector);
- diff = sum(u != minDistVector)
- minDistVector = u; # update assignment
- if( verbose ){
+ # avoid densification of 'colMins(G + minDist)' via colMin-colMax transform
+ # (we exploit here that ^x with x!=0 is treated as sparse-safe and otherwise
+ # Inf or NaN and replaced with 0, which keeps large sparse graphs sparse)
+ Gp = (G + (G!=0) * minDist)^(-1);
+ Gp = replace(target=Gp, pattern=Inf, replacement=0);
+ Gp = replace(target=Gp, pattern=NaN, replacement=0);
+ u = min(t(1/colMaxs(Gp)), minDist);
+ diff = sum(u != minDist);
+ minDist = u; # update assignment
+ if( verbose )
print("Shortest Path: iter = "+iter+", #diff = "+diff);
- }
iter = iter + 1;
}
- C=minDistVector
- if(verbose) {
- print("SHORTEST PATH CALCULATION FINISHED, CHECK OUTPUT MATRIX OF MINIMUM DISTANCES:");
- print(toString(C))
- }
+ C = minDist;
+ if(verbose)
+ print("Shortest Path: \n"+toString(C));
}
diff --git a/src/test/java/org/apache/sysds/test/functions/builtin/part2/BuiltinShortestPathTest.java b/src/test/java/org/apache/sysds/test/functions/builtin/part2/BuiltinShortestPathTest.java
index 618e39b229..aadcf794b2 100644
--- a/src/test/java/org/apache/sysds/test/functions/builtin/part2/BuiltinShortestPathTest.java
+++ b/src/test/java/org/apache/sysds/test/functions/builtin/part2/BuiltinShortestPathTest.java
@@ -33,7 +33,7 @@ public class BuiltinShortestPathTest extends AutomatedTestBase {
private final static String TEST_DIR = "functions/builtin/";
private static final String TEST_CLASS_DIR = TEST_DIR + BuiltinShortestPathTest.class.getSimpleName() + "/";
private final static double eps = 1e-10;
-
+
@Override
public void setUp() {
TestUtils.clearAssertionInformation();
@@ -44,18 +44,16 @@ public class BuiltinShortestPathTest extends AutomatedTestBase {
public void testShortestPathNode1CP() {
runShortestPathNodeTest(1, new double[][] {{0}, {2}, {5}, {5}});
}
-
+
@Test
public void testShortestPathNode2CP() {
runShortestPathNodeTest(2, new double[][] {{1}, {0}, {4}, {5}});
}
-
+
@Test
public void testShortestPathNode3CP() {
runShortestPathNodeTest(3, new double[][] {{4}, {3}, {0}, {1}});
}
-
-
private void runShortestPathNodeTest(int node, double [][] Res) {
loadTestConfiguration(getTestConfiguration(TEST_NAME));