You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nlpcraft.apache.org by se...@apache.org on 2021/03/08 12:26:25 UTC
[incubator-nlpcraft] 04/05: WIP.
This is an automated email from the ASF dual-hosted git repository.
sergeykamov pushed a commit to branch NLPCRAFT-261
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
commit 854700cc18384e701de42a2920df9000a6b43db0
Author: Sergey Kamov <sk...@gmail.com>
AuthorDate: Mon Mar 8 15:23:29 2021 +0300
WIP.
---
.../mgrs/sentence/NCComboHelper.java} | 155 +++++++++------------
1 file changed, 65 insertions(+), 90 deletions(-)
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/util/NCComboRecursiveTask.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/sentence/NCComboHelper.java
similarity index 52%
rename from nlpcraft/src/main/scala/org/apache/nlpcraft/common/util/NCComboRecursiveTask.java
rename to nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/sentence/NCComboHelper.java
index 834735b..da2d3fd 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/util/NCComboRecursiveTask.java
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/sentence/NCComboHelper.java
@@ -15,21 +15,22 @@
* limitations under the License.
*/
-package org.apache.nlpcraft.common.util;
+package org.apache.nlpcraft.probe.mgrs.sentence;
import java.util.ArrayList;
import java.util.Collection;
-import java.util.Collections;
import java.util.Comparator;
-import java.util.HashSet;
import java.util.List;
+import java.util.Set;
import java.util.concurrent.ForkJoinPool;
import java.util.concurrent.RecursiveTask;
-import java.util.stream.Collectors;
import static java.util.stream.Collectors.toList;
-public class NCComboRecursiveTask extends RecursiveTask<List<Long>> {
+/**
+ *
+ */
+class NCComboHelper extends RecursiveTask<List<Long>> {
private static final long THRESHOLD = (long)Math.pow(2, 20);
private final long lo;
@@ -37,50 +38,44 @@ public class NCComboRecursiveTask extends RecursiveTask<List<Long>> {
private final long[] wordBits;
private final int[] wordCounts;
- private NCComboRecursiveTask(long lo, long hi, long[] wordBits, int[] wordCounts) {
+ private NCComboHelper(long lo, long hi, long[] wordBits, int[] wordCounts) {
this.lo = lo;
this.hi = hi;
this.wordBits = wordBits;
this.wordCounts = wordCounts;
}
- public static <T> List<List<T>> findCombinations(List<List<T>> inp, ForkJoinPool pool) {
- List<List<T>> uniqueInp = inp;
-
+ /**
+ *
+ * @param words
+ * @param pool
+ * @param <T>
+ * @return
+ */
+ static <T> List<List<T>> findCombinations(List<Set<T>> words, ForkJoinPool pool) {
// Build dictionary of unique words.
- List<T> dict = uniqueInp.stream()
- .flatMap(Collection::stream)
- .distinct()
- .collect(toList());
-
- System.out.println("inp=" + inp);
- System.out.println("dict=" + dict);
+ List<T> dict = words.stream().flatMap(Collection::stream).distinct().collect(toList());
- if (dict.size() > Long.SIZE) {
+ if (dict.size() > Long.SIZE)
// Note: Power set of 64 words results in 9223372036854775807 combinations.
throw new IllegalArgumentException("Dictionary is too long: " + dict.size());
- }
// Convert words to bitmasks (each bit corresponds to an index in the dictionary).
- long[] wordBits = uniqueInp.stream()
- .sorted(Comparator.comparingInt(List::size))
- .mapToLong(row -> wordsToBits(row, dict))
- .toArray();
+ long[] wordBits =
+ words.stream().sorted(Comparator.comparingInt(Set::size)).mapToLong(row -> wordsToBits(row, dict)).toArray();
// Cache words count per row.
- int[] wordCounts = uniqueInp.stream().sorted(Comparator.comparingInt(List::size)).mapToInt(List::size).toArray();
+ int[] wordCounts = words.stream().sorted(Comparator.comparingInt(Set::size)).mapToInt(Set::size).toArray();
// Prepare Fork/Join task to iterate over the power set of all combinations.
- int lo = 1;
- long hi = (long)Math.pow(2, dict.size());
-
- NCComboRecursiveTask task = new NCComboRecursiveTask(lo, hi, wordBits, wordCounts);
-
- final List<List<T>> res = pool.invoke(task).stream().map(bits -> bitsToWords(bits, dict)).collect(toList());
-
- System.out.println("res=" + res);
-
- return res;
+ return pool.invoke(
+ new NCComboHelper(
+ 1,
+ (long)Math.pow(2, dict.size()),
+ wordBits,
+ wordCounts
+ )
+ ).stream().map(bits -> bitsToWords(bits, dict)).collect(toList());
}
@Override
@@ -89,7 +84,7 @@ public class NCComboRecursiveTask extends RecursiveTask<List<Long>> {
}
private List<Long> computeLocal() {
- List<Long> result = new ArrayList<>();
+ List<Long> res = new ArrayList<>();
for (long comboBits = lo; comboBits < hi; comboBits++) {
boolean match = true;
@@ -98,12 +93,8 @@ public class NCComboRecursiveTask extends RecursiveTask<List<Long>> {
// from the input row would give us the expected result.
for (int j = 0; j < wordBits.length; j++) {
// Get bitmask of how many words can be subtracted from the row.
- long commonBits = wordBits[j] & comboBits;
-
- int wordsToRemove = Long.bitCount(commonBits);
-
// Check if there is more than 1 word remaining after subtraction.
- if (wordCounts[j] - wordsToRemove > 1) {
+ if (wordCounts[j] - Long.bitCount(wordBits[j] & comboBits) > 1) {
// Skip this combination.
match = false;
@@ -111,19 +102,18 @@ public class NCComboRecursiveTask extends RecursiveTask<List<Long>> {
}
}
- if (match && !includes(comboBits, result)) {
- result.add(comboBits);
- }
+ if (match && !includes(comboBits, res))
+ res.add(comboBits);
}
- return result;
+ return res;
}
private List<Long> forkJoin() {
long mid = lo + hi >>> 1L;
- NCComboRecursiveTask t1 = new NCComboRecursiveTask(lo, mid, wordBits, wordCounts);
- NCComboRecursiveTask t2 = new NCComboRecursiveTask(mid, hi, wordBits, wordCounts);
+ NCComboHelper t1 = new NCComboHelper(lo, mid, wordBits, wordCounts);
+ NCComboHelper t2 = new NCComboHelper(mid, hi, wordBits, wordCounts);
t2.fork();
@@ -131,63 +121,52 @@ public class NCComboRecursiveTask extends RecursiveTask<List<Long>> {
}
private List<Long> merge(List<Long> l1, List<Long> l2) {
- if (l1.isEmpty()) {
+ if (l1.isEmpty())
return l2;
- }
- else if (l2.isEmpty()) {
+ else if (l2.isEmpty())
return l1;
- }
int size1 = l1.size();
int size2 = l2.size();
if (size1 == 1 && size2 > 1 || size2 == 1 && size1 > 1) {
// Minor optimization in case if one of the lists has only one element.
- List<Long> list = size1 == 1 ? l2 : l1;
+ List<Long> res = size1 == 1 ? l2 : l1;
Long val = size1 == 1 ? l1.get(0) : l2.get(0);
- if (!includes(val, list)) {
- list.add(val);
- }
+ if (!includes(val, res))
+ res.add(val);
- return list;
+ return res;
}
- else {
- List<Long> result = new ArrayList<>(size1 + size2);
-
- for (int i = 0, max = Math.max(size1, size2); i < max; i++) {
- Long v1 = i < size1 ? l1.get(i) : null;
- Long v2 = i < size2 ? l2.get(i) : null;
-
- if (v1 != null && v2 != null) {
- if (containsAllBits(v1, v2)) {
- v1 = null;
- }
- else if (containsAllBits(v2, v1)) {
- v2 = null;
- }
- }
- if (v1 != null && !includes(v1, result)) {
- result.add(v1);
- }
+ List<Long> res = new ArrayList<>(size1 + size2);
- if (v2 != null && !includes(v2, result)) {
- result.add(v2);
- }
+ for (int i = 0, max = Math.max(size1, size2); i < max; i++) {
+ Long v1 = i < size1 ? l1.get(i) : null;
+ Long v2 = i < size2 ? l2.get(i) : null;
+
+ if (v1 != null && v2 != null) {
+ if (containsAllBits(v1, v2))
+ v1 = null;
+ else if (containsAllBits(v2, v1))
+ v2 = null;
}
- return result;
+ if (v1 != null && !includes(v1, res))
+ res.add(v1);
+
+ if (v2 != null && !includes(v2, res))
+ res.add(v2);
}
+
+ return res;
}
private static boolean includes(long bits, List<Long> allBits) {
- for (int i = 0, size = allBits.size(); i < size; i++) {
- long existing = allBits.get(i);
-
- if (containsAllBits(bits, existing)) {
+ for (int i = 0, n = allBits.size(); i < n; i++) {
+ if (containsAllBits(bits, allBits.get(i)))
return true;
- }
}
return false;
@@ -197,14 +176,12 @@ public class NCComboRecursiveTask extends RecursiveTask<List<Long>> {
return (bitSet1 & bitSet2) == bitSet2;
}
- private static <T> long wordsToBits(List<T> words, List<T> dict) {
+ private static <T> long wordsToBits(Set<T> words, List<T> dict) {
long bits = 0;
- for (int i = 0; i < dict.size(); i++) {
- if (words.contains(dict.get(i))) {
+ for (int i = 0, n = dict.size(); i < n; i++)
+ if (words.contains(dict.get(i)))
bits |= 1L << i;
- }
- }
return bits;
}
@@ -212,11 +189,9 @@ public class NCComboRecursiveTask extends RecursiveTask<List<Long>> {
private static <T> List<T> bitsToWords(long bits, List<T> dict) {
List<T> words = new ArrayList<>(Long.bitCount(bits));
- for (int i = 0; i < dict.size(); i++) {
- if ((bits & 1L << i) != 0) {
+ for (int i = 0, n = dict.size(); i < n; i++)
+ if ((bits & 1L << i) != 0)
words.add(dict.get(i));
- }
- }
return words;
}