You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hugegraph.apache.org by ji...@apache.org on 2022/11/09 10:25:08 UTC
[incubator-hugegraph] 05/33: optimize filterNonShortestPath() for betweeness_centrality (#12)
This is an automated email from the ASF dual-hosted git repository.
jin pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-hugegraph.git
commit 7a72d7df60963dac3e0a33d30bf7758abf2830ba
Author: Jermy Li <li...@baidu.com>
AuthorDate: Wed Apr 15 11:03:59 2020 +0800
optimize filterNonShortestPath() for betweeness_centrality (#12)
network10000 dataset test:
before after
(depth=4 sample=1) 395s 25s
(depth=3 sample=2) 4300s 35s
same as the closeness_centrality
Change-Id: Ia0c557434bf25f9d13a0b1dc19f66024e08c89df
---
.../hugegraph/job/algorithm/AbstractAlgorithm.java | 21 +++++++++++-----
.../job/algorithm/cent/AbstractCentAlgorithm.java | 29 ++++++++++++++++++++++
.../cent/BetweenessCentralityAlgorithm.java | 23 ++++-------------
.../cent/ClosenessCentralityAlgorithm.java | 23 ++++-------------
.../algorithm/cent/DegreeCentralityAlgorithm.java | 4 +--
5 files changed, 56 insertions(+), 44 deletions(-)
diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/AbstractAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/AbstractAlgorithm.java
index 8387a69f9..e77473668 100644
--- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/AbstractAlgorithm.java
+++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/AbstractAlgorithm.java
@@ -401,10 +401,10 @@ public abstract class AbstractAlgorithm implements Algorithm {
}
}
- public static final class TopMap {
+ public static final class TopMap<K> {
private final long topN;
- private Map<Id, MutableLong> tops;
+ private Map<K, MutableLong> tops;
public TopMap(long topN) {
this.topN = topN;
@@ -415,11 +415,20 @@ public abstract class AbstractAlgorithm implements Algorithm {
return this.tops.size();
}
- public void put(Id key, long value) {
- this.put(key, Long.valueOf(value));
+ public MutableLong get(K key) {
+ return this.tops.get(key);
}
- public void put(Id key, Long value) {
+ public void add(K key, long value) {
+ MutableLong mlong = this.tops.get(key);
+ if (mlong == null) {
+ mlong = new MutableLong(value);
+ this.tops.put(key, mlong);
+ }
+ mlong.add(value);
+ }
+
+ public void put(K key, long value) {
this.tops.put(key, new MutableLong(value));
// keep 2x buffer
if (this.tops.size() > this.topN * 2) {
@@ -427,7 +436,7 @@ public abstract class AbstractAlgorithm implements Algorithm {
}
}
- public Set<Map.Entry<Id, MutableLong>> entrySet() {
+ public Set<Map.Entry<K, MutableLong>> entrySet() {
this.shrinkIfNeeded(this.topN);
return this.tops.entrySet();
}
diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/AbstractCentAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/AbstractCentAlgorithm.java
index 37492e456..c36743176 100644
--- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/AbstractCentAlgorithm.java
+++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/AbstractCentAlgorithm.java
@@ -19,14 +19,20 @@
package com.baidu.hugegraph.job.algorithm.cent;
+import java.util.HashMap;
+import java.util.List;
import java.util.Map;
+import org.apache.commons.lang3.tuple.Pair;
+import org.apache.tinkerpop.gremlin.process.traversal.Pop;
import org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.GraphTraversal;
import org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.__;
import org.apache.tinkerpop.gremlin.structure.Vertex;
+import com.baidu.hugegraph.backend.id.Id;
import com.baidu.hugegraph.job.Job;
import com.baidu.hugegraph.job.algorithm.AbstractAlgorithm;
+import com.baidu.hugegraph.structure.HugeElement;
public abstract class AbstractCentAlgorithm extends AbstractAlgorithm {
@@ -106,5 +112,28 @@ public abstract class AbstractCentAlgorithm extends AbstractAlgorithm {
}
return unit;
}
+
+ protected GraphTraversal<Vertex, Vertex> filterNonShortestPath(
+ GraphTraversal<Vertex, Vertex>
+ t) {
+ long size = this.graph().traversal().V().limit(MAX_QUERY_LIMIT)
+ .count().next();
+ Map<Pair<Id, Id>, Integer> triples = new HashMap<>((int) size);
+ return t.filter(it -> {
+ Id start = it.<HugeElement>path(Pop.first, "v").id();
+ Id end = it.<HugeElement>path(Pop.last, "v").id();
+ int len = it.<List<?>>path(Pop.all, "v").size();
+ Pair<Id, Id> key = Pair.of(start, end);
+ Integer shortest = triples.get(key);
+ if (shortest != null && shortest != len) {
+ // ignore non shortest path
+ return false;
+ }
+ if (shortest == null) {
+ triples.put(key, len);
+ }
+ return true;
+ });
+ }
}
}
diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/BetweenessCentralityAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/BetweenessCentralityAlgorithm.java
index ae1b8bb74..9a72d2f62 100644
--- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/BetweenessCentralityAlgorithm.java
+++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/BetweenessCentralityAlgorithm.java
@@ -73,25 +73,12 @@ public class BetweenessCentralityAlgorithm extends AbstractCentAlgorithm {
sourceCLabel);
t = constructPath(t, degree, sample, sourceLabel, sourceCLabel);
t = t.emit().until(__.loops().is(P.gte(depth)));
+ t = filterNonShortestPath(t);
- @SuppressWarnings({ "unchecked", "deprecation" })
- GraphTraversal<Vertex, Vertex> tf = t.filter(
- __.project("x","y","z")
- .by(__.select(Pop.first, "v").id())
- .by(__.select(Pop.last, "v").id())
- .by(__.select(Pop.all, "v").count(Scope.local))
- .as("triple")
- .coalesce(__.select("x","y").as("a")
- .select("triples").unfold().as("t")
- .select("x","y").where(P.eq("a")).select("t"),
- __.store("triples"))
- .select("z").as("length")
- .select("triple").select("z").where(P.eq("length")));
-
- GraphTraversal<Vertex, ?> tg = tf.select(Pop.all, "v")
- .unfold().id()
- .groupCount().order(Scope.local)
- .by(Column.values, Order.desc);
+ GraphTraversal<Vertex, ?> tg = t.select(Pop.all, "v")
+ .unfold().id()
+ .groupCount().order(Scope.local)
+ .by(Column.values, Order.desc);
GraphTraversal<Vertex, ?> tLimit = topN <= 0L ? tg :
tg.limit(Scope.local, topN);
diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/ClosenessCentralityAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/ClosenessCentralityAlgorithm.java
index d890db808..96e9709fe 100644
--- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/ClosenessCentralityAlgorithm.java
+++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/ClosenessCentralityAlgorithm.java
@@ -82,26 +82,13 @@ public class ClosenessCentralityAlgorithm extends AbstractCentAlgorithm {
sourceCLabel);
t = constructPath(t, degree, sample, sourceLabel, sourceCLabel);
t = t.emit().until(__.loops().is(P.gte(depth)));
-
- @SuppressWarnings({ "unchecked", "deprecation" })
- GraphTraversal<Vertex, Vertex> tf = t.filter(
- __.project("x","y","z")
- .by(__.select(Pop.first, "v").id())
- .by(__.select(Pop.last, "v").id())
- .by(__.select(Pop.all, "v").count(Scope.local))
- .as("triple")
- .coalesce(__.select("x","y").as("a")
- .select("triples").unfold().as("t")
- .select("x","y").where(P.eq("a")).select("t"),
- __.store("triples"))
- .select("z").as("length")
- .select("triple").select("z").where(P.eq("length")));
+ t = filterNonShortestPath(t);
GraphTraversal<Vertex, ?> tg;
- tg = tf.group().by(__.select(Pop.first, "v").id())
- .by(__.select(Pop.all, "v").count(Scope.local)
- .sack(Operator.div).sack().sum())
- .order(Scope.local).by(Column.values, Order.desc);
+ tg = t.group().by(__.select(Pop.first, "v").id())
+ .by(__.select(Pop.all, "v").count(Scope.local)
+ .sack(Operator.div).sack().sum())
+ .order(Scope.local).by(Column.values, Order.desc);
GraphTraversal<Vertex, ?> tLimit = topN <= 0L ? tg :
tg.limit(Scope.local, topN);
diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/DegreeCentralityAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/DegreeCentralityAlgorithm.java
index 81bd33672..5f6781b21 100644
--- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/DegreeCentralityAlgorithm.java
+++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/DegreeCentralityAlgorithm.java
@@ -67,7 +67,7 @@ public class DegreeCentralityAlgorithm extends AbstractCentAlgorithm {
Iterator<Edge> edges = this.edges(direction);
JsonMap degrees = new JsonMap();
- TopMap tops = new TopMap(topN);
+ TopMap<Id> tops = new TopMap<>(topN);
Id vertex = null;
long degree = 0L;
long total = 0L;
@@ -111,7 +111,7 @@ public class DegreeCentralityAlgorithm extends AbstractCentAlgorithm {
assert topN >= 0L;
long total = 0L;
JsonMap degrees = new JsonMap();
- TopMap tops = new TopMap(topN);
+ TopMap<Id> tops = new TopMap<>(topN);
GraphTraversalSource traversal = this.graph().traversal();
Iterator<Vertex> vertices = this.vertices();