You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nlpcraft.apache.org by se...@apache.org on 2021/03/08 12:26:25 UTC

[incubator-nlpcraft] 04/05: WIP.

This is an automated email from the ASF dual-hosted git repository.

sergeykamov pushed a commit to branch NLPCRAFT-261
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git

commit 854700cc18384e701de42a2920df9000a6b43db0
Author: Sergey Kamov <sk...@gmail.com>
AuthorDate: Mon Mar 8 15:23:29 2021 +0300

    WIP.
---
 .../mgrs/sentence/NCComboHelper.java}              | 155 +++++++++------------
 1 file changed, 65 insertions(+), 90 deletions(-)

diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/util/NCComboRecursiveTask.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/sentence/NCComboHelper.java
similarity index 52%
rename from nlpcraft/src/main/scala/org/apache/nlpcraft/common/util/NCComboRecursiveTask.java
rename to nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/sentence/NCComboHelper.java
index 834735b..da2d3fd 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/util/NCComboRecursiveTask.java
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/sentence/NCComboHelper.java
@@ -15,21 +15,22 @@
  * limitations under the License.
  */
 
-package org.apache.nlpcraft.common.util;
+package org.apache.nlpcraft.probe.mgrs.sentence;
 
 import java.util.ArrayList;
 import java.util.Collection;
-import java.util.Collections;
 import java.util.Comparator;
-import java.util.HashSet;
 import java.util.List;
+import java.util.Set;
 import java.util.concurrent.ForkJoinPool;
 import java.util.concurrent.RecursiveTask;
-import java.util.stream.Collectors;
 
 import static java.util.stream.Collectors.toList;
 
-public class NCComboRecursiveTask extends RecursiveTask<List<Long>> {
+/**
+ *
+ */
+class NCComboHelper extends RecursiveTask<List<Long>> {
     private static final long THRESHOLD = (long)Math.pow(2, 20);
 
     private final long lo;
@@ -37,50 +38,44 @@ public class NCComboRecursiveTask extends RecursiveTask<List<Long>> {
     private final long[] wordBits;
     private final int[] wordCounts;
 
-    private NCComboRecursiveTask(long lo, long hi, long[] wordBits, int[] wordCounts) {
+    private NCComboHelper(long lo, long hi, long[] wordBits, int[] wordCounts) {
         this.lo = lo;
         this.hi = hi;
         this.wordBits = wordBits;
         this.wordCounts = wordCounts;
     }
 
-    public static <T> List<List<T>> findCombinations(List<List<T>> inp, ForkJoinPool pool) {
-        List<List<T>> uniqueInp = inp;
-
+    /**
+     *
+     * @param words
+     * @param pool
+     * @param <T>
+     * @return
+     */
+    static <T> List<List<T>> findCombinations(List<Set<T>> words, ForkJoinPool pool) {
         // Build dictionary of unique words.
-        List<T> dict = uniqueInp.stream()
-            .flatMap(Collection::stream)
-            .distinct()
-            .collect(toList());
-
-        System.out.println("inp=" + inp);
-        System.out.println("dict=" + dict);
+        List<T> dict = words.stream().flatMap(Collection::stream).distinct().collect(toList());
 
-        if (dict.size() > Long.SIZE) {
+        if (dict.size() > Long.SIZE)
             // Note: Power set of 64 words results in 9223372036854775807 combinations.
             throw new IllegalArgumentException("Dictionary is too long: " + dict.size());
-        }
 
         // Convert words to bitmasks (each bit corresponds to an index in the dictionary).
-        long[] wordBits = uniqueInp.stream()
-            .sorted(Comparator.comparingInt(List::size))
-            .mapToLong(row -> wordsToBits(row, dict))
-            .toArray();
+        long[] wordBits =
+            words.stream().sorted(Comparator.comparingInt(Set::size)).mapToLong(row -> wordsToBits(row, dict)).toArray();
 
         // Cache words count per row.
-        int[] wordCounts = uniqueInp.stream().sorted(Comparator.comparingInt(List::size)).mapToInt(List::size).toArray();
+        int[] wordCounts = words.stream().sorted(Comparator.comparingInt(Set::size)).mapToInt(Set::size).toArray();
 
         // Prepare Fork/Join task to iterate over the power set of all combinations.
-        int lo = 1;
-        long hi = (long)Math.pow(2, dict.size());
-
-        NCComboRecursiveTask task = new NCComboRecursiveTask(lo, hi, wordBits, wordCounts);
-
-        final List<List<T>> res = pool.invoke(task).stream().map(bits -> bitsToWords(bits, dict)).collect(toList());
-
-        System.out.println("res=" + res);
-
-        return res;
+        return pool.invoke(
+            new NCComboHelper(
+                1,
+                (long)Math.pow(2, dict.size()),
+                wordBits,
+                wordCounts
+            )
+        ).stream().map(bits -> bitsToWords(bits, dict)).collect(toList());
     }
 
     @Override
@@ -89,7 +84,7 @@ public class NCComboRecursiveTask extends RecursiveTask<List<Long>> {
     }
 
     private List<Long> computeLocal() {
-        List<Long> result = new ArrayList<>();
+        List<Long> res = new ArrayList<>();
 
         for (long comboBits = lo; comboBits < hi; comboBits++) {
             boolean match = true;
@@ -98,12 +93,8 @@ public class NCComboRecursiveTask extends RecursiveTask<List<Long>> {
             // from the input row would give us the expected result.
             for (int j = 0; j < wordBits.length; j++) {
                 // Get bitmask of how many words can be subtracted from the row.
-                long commonBits = wordBits[j] & comboBits;
-
-                int wordsToRemove = Long.bitCount(commonBits);
-
                 // Check if there is more than 1 word remaining after subtraction.
-                if (wordCounts[j] - wordsToRemove > 1) {
+                if (wordCounts[j] - Long.bitCount(wordBits[j] & comboBits) > 1) {
                     // Skip this combination.
                     match = false;
 
@@ -111,19 +102,18 @@ public class NCComboRecursiveTask extends RecursiveTask<List<Long>> {
                 }
             }
 
-            if (match && !includes(comboBits, result)) {
-                result.add(comboBits);
-            }
+            if (match && !includes(comboBits, res))
+                res.add(comboBits);
         }
 
-        return result;
+        return res;
     }
 
     private List<Long> forkJoin() {
         long mid = lo + hi >>> 1L;
 
-        NCComboRecursiveTask t1 = new NCComboRecursiveTask(lo, mid, wordBits, wordCounts);
-        NCComboRecursiveTask t2 = new NCComboRecursiveTask(mid, hi, wordBits, wordCounts);
+        NCComboHelper t1 = new NCComboHelper(lo, mid, wordBits, wordCounts);
+        NCComboHelper t2 = new NCComboHelper(mid, hi, wordBits, wordCounts);
 
         t2.fork();
 
@@ -131,63 +121,52 @@ public class NCComboRecursiveTask extends RecursiveTask<List<Long>> {
     }
 
     private List<Long> merge(List<Long> l1, List<Long> l2) {
-        if (l1.isEmpty()) {
+        if (l1.isEmpty())
             return l2;
-        }
-        else if (l2.isEmpty()) {
+        else if (l2.isEmpty())
             return l1;
-        }
 
         int size1 = l1.size();
         int size2 = l2.size();
 
         if (size1 == 1 && size2 > 1 || size2 == 1 && size1 > 1) {
             // Minor optimization in case if one of the lists has only one element.
-            List<Long> list = size1 == 1 ? l2 : l1;
+            List<Long> res = size1 == 1 ? l2 : l1;
             Long val = size1 == 1 ? l1.get(0) : l2.get(0);
 
-            if (!includes(val, list)) {
-                list.add(val);
-            }
+            if (!includes(val, res))
+                res.add(val);
 
-            return list;
+            return res;
         }
-        else {
-            List<Long> result = new ArrayList<>(size1 + size2);
-
-            for (int i = 0, max = Math.max(size1, size2); i < max; i++) {
-                Long v1 = i < size1 ? l1.get(i) : null;
-                Long v2 = i < size2 ? l2.get(i) : null;
-
-                if (v1 != null && v2 != null) {
-                    if (containsAllBits(v1, v2)) {
-                        v1 = null;
-                    }
-                    else if (containsAllBits(v2, v1)) {
-                        v2 = null;
-                    }
-                }
 
-                if (v1 != null && !includes(v1, result)) {
-                    result.add(v1);
-                }
+        List<Long> res = new ArrayList<>(size1 + size2);
 
-                if (v2 != null && !includes(v2, result)) {
-                    result.add(v2);
-                }
+        for (int i = 0, max = Math.max(size1, size2); i < max; i++) {
+            Long v1 = i < size1 ? l1.get(i) : null;
+            Long v2 = i < size2 ? l2.get(i) : null;
+
+            if (v1 != null && v2 != null) {
+                if (containsAllBits(v1, v2))
+                    v1 = null;
+                else if (containsAllBits(v2, v1))
+                    v2 = null;
             }
 
-            return result;
+            if (v1 != null && !includes(v1, res))
+                res.add(v1);
+
+            if (v2 != null && !includes(v2, res))
+                res.add(v2);
         }
+
+        return res;
     }
 
     private static boolean includes(long bits, List<Long> allBits) {
-        for (int i = 0, size = allBits.size(); i < size; i++) {
-            long existing = allBits.get(i);
-
-            if (containsAllBits(bits, existing)) {
+        for (int i = 0, n = allBits.size(); i < n; i++) {
+            if (containsAllBits(bits, allBits.get(i)))
                 return true;
-            }
         }
 
         return false;
@@ -197,14 +176,12 @@ public class NCComboRecursiveTask extends RecursiveTask<List<Long>> {
         return (bitSet1 & bitSet2) == bitSet2;
     }
 
-    private static <T> long wordsToBits(List<T> words, List<T> dict) {
+    private static <T> long wordsToBits(Set<T> words, List<T> dict) {
         long bits = 0;
 
-        for (int i = 0; i < dict.size(); i++) {
-            if (words.contains(dict.get(i))) {
+        for (int i = 0, n = dict.size(); i < n; i++)
+            if (words.contains(dict.get(i)))
                 bits |= 1L << i;
-            }
-        }
 
         return bits;
     }
@@ -212,11 +189,9 @@ public class NCComboRecursiveTask extends RecursiveTask<List<Long>> {
     private static <T> List<T> bitsToWords(long bits, List<T> dict) {
         List<T> words = new ArrayList<>(Long.bitCount(bits));
 
-        for (int i = 0; i < dict.size(); i++) {
-            if ((bits & 1L << i) != 0) {
+        for (int i = 0, n = dict.size(); i < n; i++)
+            if ((bits & 1L << i) != 0)
                 words.add(dict.get(i));
-            }
-        }
 
         return words;
     }