You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hugegraph.apache.org by ji...@apache.org on 2022/11/09 10:25:08 UTC

[incubator-hugegraph] 05/33: optimize filterNonShortestPath() for betweeness_centrality (#12)

This is an automated email from the ASF dual-hosted git repository.

jin pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-hugegraph.git

commit 7a72d7df60963dac3e0a33d30bf7758abf2830ba
Author: Jermy Li <li...@baidu.com>
AuthorDate: Wed Apr 15 11:03:59 2020 +0800

    optimize filterNonShortestPath() for betweeness_centrality (#12)
    
    network10000 dataset test:
                       before  after
    (depth=4 sample=1) 395s    25s
    (depth=3 sample=2) 4300s   35s
    
    same as the closeness_centrality
    
    Change-Id: Ia0c557434bf25f9d13a0b1dc19f66024e08c89df
---
 .../hugegraph/job/algorithm/AbstractAlgorithm.java | 21 +++++++++++-----
 .../job/algorithm/cent/AbstractCentAlgorithm.java  | 29 ++++++++++++++++++++++
 .../cent/BetweenessCentralityAlgorithm.java        | 23 ++++-------------
 .../cent/ClosenessCentralityAlgorithm.java         | 23 ++++-------------
 .../algorithm/cent/DegreeCentralityAlgorithm.java  |  4 +--
 5 files changed, 56 insertions(+), 44 deletions(-)

diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/AbstractAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/AbstractAlgorithm.java
index 8387a69f9..e77473668 100644
--- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/AbstractAlgorithm.java
+++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/AbstractAlgorithm.java
@@ -401,10 +401,10 @@ public abstract class AbstractAlgorithm implements Algorithm {
         }
     }
 
-    public static final class TopMap {
+    public static final class TopMap<K> {
 
         private final long topN;
-        private Map<Id, MutableLong> tops;
+        private Map<K, MutableLong> tops;
 
         public TopMap(long topN) {
             this.topN = topN;
@@ -415,11 +415,20 @@ public abstract class AbstractAlgorithm implements Algorithm {
             return this.tops.size();
         }
 
-        public void put(Id key, long value) {
-            this.put(key, Long.valueOf(value));
+        public MutableLong get(K key) {
+            return this.tops.get(key);
         }
 
-        public void put(Id key, Long value) {
+        public void add(K key, long value) {
+            MutableLong mlong = this.tops.get(key);
+            if (mlong == null) {
+                mlong = new MutableLong(value);
+                this.tops.put(key, mlong);
+            }
+            mlong.add(value);
+        }
+
+        public void put(K key, long value) {
             this.tops.put(key, new MutableLong(value));
             // keep 2x buffer
             if (this.tops.size() > this.topN * 2) {
@@ -427,7 +436,7 @@ public abstract class AbstractAlgorithm implements Algorithm {
             }
         }
 
-        public Set<Map.Entry<Id, MutableLong>> entrySet() {
+        public Set<Map.Entry<K, MutableLong>> entrySet() {
             this.shrinkIfNeeded(this.topN);
             return this.tops.entrySet();
         }
diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/AbstractCentAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/AbstractCentAlgorithm.java
index 37492e456..c36743176 100644
--- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/AbstractCentAlgorithm.java
+++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/AbstractCentAlgorithm.java
@@ -19,14 +19,20 @@
 
 package com.baidu.hugegraph.job.algorithm.cent;
 
+import java.util.HashMap;
+import java.util.List;
 import java.util.Map;
 
+import org.apache.commons.lang3.tuple.Pair;
+import org.apache.tinkerpop.gremlin.process.traversal.Pop;
 import org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.GraphTraversal;
 import org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.__;
 import org.apache.tinkerpop.gremlin.structure.Vertex;
 
+import com.baidu.hugegraph.backend.id.Id;
 import com.baidu.hugegraph.job.Job;
 import com.baidu.hugegraph.job.algorithm.AbstractAlgorithm;
+import com.baidu.hugegraph.structure.HugeElement;
 
 public abstract class AbstractCentAlgorithm extends AbstractAlgorithm {
 
@@ -106,5 +112,28 @@ public abstract class AbstractCentAlgorithm extends AbstractAlgorithm {
             }
             return unit;
         }
+
+        protected GraphTraversal<Vertex, Vertex> filterNonShortestPath(
+                                                 GraphTraversal<Vertex, Vertex>
+                                                 t) {
+            long size = this.graph().traversal().V().limit(MAX_QUERY_LIMIT)
+                                                    .count().next();
+            Map<Pair<Id, Id>, Integer> triples = new HashMap<>((int) size);
+            return t.filter(it -> {
+                Id start = it.<HugeElement>path(Pop.first, "v").id();
+                Id end = it.<HugeElement>path(Pop.last, "v").id();
+                int len = it.<List<?>>path(Pop.all, "v").size();
+                Pair<Id, Id> key = Pair.of(start, end);
+                Integer shortest = triples.get(key);
+                if (shortest != null && shortest != len) {
+                    // ignore non shortest path
+                    return false;
+                }
+                if (shortest == null) {
+                    triples.put(key, len);
+                }
+                return true;
+            });
+        }
     }
 }
diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/BetweenessCentralityAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/BetweenessCentralityAlgorithm.java
index ae1b8bb74..9a72d2f62 100644
--- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/BetweenessCentralityAlgorithm.java
+++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/BetweenessCentralityAlgorithm.java
@@ -73,25 +73,12 @@ public class BetweenessCentralityAlgorithm extends AbstractCentAlgorithm {
                                                                sourceCLabel);
             t = constructPath(t, degree, sample, sourceLabel, sourceCLabel);
             t = t.emit().until(__.loops().is(P.gte(depth)));
+            t = filterNonShortestPath(t);
 
-            @SuppressWarnings({ "unchecked", "deprecation" })
-            GraphTraversal<Vertex, Vertex> tf = t.filter(
-                    __.project("x","y","z")
-                           .by(__.select(Pop.first, "v").id())
-                           .by(__.select(Pop.last, "v").id())
-                           .by(__.select(Pop.all, "v").count(Scope.local))
-                      .as("triple")
-                      .coalesce(__.select("x","y").as("a")
-                                  .select("triples").unfold().as("t")
-                                  .select("x","y").where(P.eq("a")).select("t"),
-                                __.store("triples"))
-                      .select("z").as("length")
-                      .select("triple").select("z").where(P.eq("length")));
-
-            GraphTraversal<Vertex, ?> tg = tf.select(Pop.all, "v")
-                                             .unfold().id()
-                                             .groupCount().order(Scope.local)
-                                             .by(Column.values, Order.desc);
+            GraphTraversal<Vertex, ?> tg = t.select(Pop.all, "v")
+                                            .unfold().id()
+                                            .groupCount().order(Scope.local)
+                                            .by(Column.values, Order.desc);
             GraphTraversal<Vertex, ?> tLimit = topN <= 0L ? tg :
                                                tg.limit(Scope.local, topN);
 
diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/ClosenessCentralityAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/ClosenessCentralityAlgorithm.java
index d890db808..96e9709fe 100644
--- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/ClosenessCentralityAlgorithm.java
+++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/ClosenessCentralityAlgorithm.java
@@ -82,26 +82,13 @@ public class ClosenessCentralityAlgorithm extends AbstractCentAlgorithm {
                                                                sourceCLabel);
             t = constructPath(t, degree, sample, sourceLabel, sourceCLabel);
             t = t.emit().until(__.loops().is(P.gte(depth)));
-
-            @SuppressWarnings({ "unchecked", "deprecation" })
-            GraphTraversal<Vertex, Vertex> tf = t.filter(
-                    __.project("x","y","z")
-                           .by(__.select(Pop.first, "v").id())
-                           .by(__.select(Pop.last, "v").id())
-                           .by(__.select(Pop.all, "v").count(Scope.local))
-                      .as("triple")
-                      .coalesce(__.select("x","y").as("a")
-                                  .select("triples").unfold().as("t")
-                                  .select("x","y").where(P.eq("a")).select("t"),
-                                __.store("triples"))
-                      .select("z").as("length")
-                      .select("triple").select("z").where(P.eq("length")));
+            t = filterNonShortestPath(t);
 
             GraphTraversal<Vertex, ?> tg;
-            tg = tf.group().by(__.select(Pop.first, "v").id())
-                           .by(__.select(Pop.all, "v").count(Scope.local)
-                                 .sack(Operator.div).sack().sum())
-                           .order(Scope.local).by(Column.values, Order.desc);
+            tg = t.group().by(__.select(Pop.first, "v").id())
+                          .by(__.select(Pop.all, "v").count(Scope.local)
+                                .sack(Operator.div).sack().sum())
+                          .order(Scope.local).by(Column.values, Order.desc);
             GraphTraversal<Vertex, ?> tLimit = topN <= 0L ? tg :
                                                tg.limit(Scope.local, topN);
 
diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/DegreeCentralityAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/DegreeCentralityAlgorithm.java
index 81bd33672..5f6781b21 100644
--- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/DegreeCentralityAlgorithm.java
+++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/DegreeCentralityAlgorithm.java
@@ -67,7 +67,7 @@ public class DegreeCentralityAlgorithm extends AbstractCentAlgorithm {
             Iterator<Edge> edges = this.edges(direction);
 
             JsonMap degrees = new JsonMap();
-            TopMap tops = new TopMap(topN);
+            TopMap<Id> tops = new TopMap<>(topN);
             Id vertex = null;
             long degree = 0L;
             long total = 0L;
@@ -111,7 +111,7 @@ public class DegreeCentralityAlgorithm extends AbstractCentAlgorithm {
             assert topN >= 0L;
             long total = 0L;
             JsonMap degrees = new JsonMap();
-            TopMap tops = new TopMap(topN);
+            TopMap<Id> tops = new TopMap<>(topN);
 
             GraphTraversalSource traversal = this.graph().traversal();
             Iterator<Vertex> vertices = this.vertices();