You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@nlpcraft.apache.org by se...@apache.org on 2021/03/08 12:26:21 UTC

[incubator-nlpcraft] branch NLPCRAFT-261 updated (b7ab66f -> 8a3fcbd)

This is an automated email from the ASF dual-hosted git repository.

sergeykamov pushed a change to branch NLPCRAFT-261
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git.


    from b7ab66f  WIP.
     new 46a636b  WIP.
     new 3b5a380  WIP.
     new 7815090  WIP.
     new 854700c  WIP.
     add 60bedc3  Parts detection fixed.
     add c999e3c  Fix for NLPCRAFT-262.
     new 8a3fcbd  Merge branch 'master' into NLPCRAFT-261

The 5 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 .../apache/nlpcraft/common/nlp/NCNlpSentence.scala | 735 +--------------------
 .../nlpcraft/model/tools/cmdline/NCCli.scala       |  17 +-
 .../org/apache/nlpcraft/probe/NCProbeBoot.scala    |   2 +
 .../probe/mgrs/nlp/NCProbeEnrichmentManager.scala  |   3 +-
 .../mgrs/nlp/enrichers/model/NCModelEnricher.scala |   8 +-
 .../probe/mgrs/sentence/NCComboHelper.java         | 198 ++++++
 .../mgrs/sentence/NCSentenceManager.scala}         | 275 ++------
 .../model/NCEnricherNestedModelSpec.scala          |  18 +-
 .../model/NCEnricherNestedModelSpec4.scala         |   2 +-
 .../probe/mgrs/nlp/enrichers/model/Test1.java      | 135 ----
 .../probe/mgrs/nlp/enrichers/model/Test2.java      | 114 ----
 11 files changed, 288 insertions(+), 1219 deletions(-)
 create mode 100644 nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/sentence/NCComboHelper.java
 copy nlpcraft/src/main/scala/org/apache/nlpcraft/{common/nlp/NCNlpSentence.scala => probe/mgrs/sentence/NCSentenceManager.scala} (75%)
 delete mode 100644 nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/Test1.java
 delete mode 100644 nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/Test2.java

[incubator-nlpcraft] 01/05: WIP.

Posted by se...@apache.org.

This is an automated email from the ASF dual-hosted git repository.

sergeykamov pushed a commit to branch NLPCRAFT-261
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git

commit 46a636be94bd9de66eb66cc3cd510db8dc12a16b
Author: Sergey Kamov <sk...@gmail.com>
AuthorDate: Sun Mar 7 14:43:53 2021 +0300

    WIP.
---
 .../apache/nlpcraft/common/nlp/NCNlpSentence.scala | 176 +++++++------
 .../org/apache/nlpcraft/common/nlp/SyTest.java     | 241 ++++++++++++++++++
 .../nlpcraft/common/util/NCComboRecursiveTask.java | 230 +++++++++++++++++
 .../nlpcraft/probe/mgrs/nlp/enrichers/SyTest.java  | 278 +++++++++++++++++++++
 4 files changed, 850 insertions(+), 75 deletions(-)

diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala
index f530327..9d9cb98 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala
@@ -20,11 +20,13 @@ package org.apache.nlpcraft.common.nlp
 import com.typesafe.scalalogging.LazyLogging
 import org.apache.nlpcraft.common.NCE
 import org.apache.nlpcraft.common.nlp.pos.NCPennTreebank
+import org.apache.nlpcraft.common.util.NCComboRecursiveTask
 import org.apache.nlpcraft.model.NCModel
 
-import java.io.{Serializable ⇒ JSerializable}
+import java.io.{Serializable => JSerializable}
 import java.util
-import java.util.{Collections, List ⇒ JList}
+import java.util.concurrent.ForkJoinPool
+import java.util.{Collections, Comparator, List => JList}
 import scala.collection.JavaConverters._
 import scala.collection.mutable.ArrayBuffer
 import scala.collection.{Map, Seq, Set, mutable}
@@ -41,7 +43,6 @@ object NCNlpSentence extends LazyLogging {
         require(start <= end)
 
         private def in(i: Int): Boolean = i >= start && i <= end
-
         def intersect(id: String, start: Int, end: Int): Boolean = id == this.id && (in(start) || in(end))
     }
 
@@ -72,7 +73,7 @@ object NCNlpSentence extends LazyLogging {
                 noteLinks ++=
                     (for ((name, idxs) ← names.asScala.zip(idxsSeq.asScala.map(_.asScala)))
                         yield NoteLink(name, idxs.sorted)
-                        )
+                    )
             }
 
             if (n.contains("subjnotes")) add("subjnotes", "subjindexes")
@@ -410,8 +411,7 @@ object NCNlpSentence extends LazyLogging {
             "stopWord" → stop,
             "bracketed" → false,
             "direct" → direct,
-            "dict" → (if (nsCopyToks.size == 1) nsCopyToks.head.getNlpNote.data[Boolean]("dict")
-            else false),
+            "dict" → (if (nsCopyToks.size == 1) nsCopyToks.head.getNlpNote.data[Boolean]("dict") else false),
             "english" → nsCopyToks.forall(_.getNlpNote.data[Boolean]("english")),
             "swear" → nsCopyToks.exists(_.getNlpNote.data[Boolean]("swear"))
         )
@@ -458,8 +458,7 @@ object NCNlpSentence extends LazyLogging {
                     var fixed = idxs
 
                     history.foreach {
-                        case (idxOld, idxNew) ⇒ fixed = fixed.map(_.map(i ⇒ if (i == idxOld) idxNew
-                        else i).distinct)
+                        case (idxOld, idxNew) ⇒ fixed = fixed.map(_.map(i ⇒ if (i == idxOld) idxNew else i).distinct)
                     }
 
                     if (fixed.forall(_.size == 1))
@@ -522,9 +521,9 @@ object NCNlpSentence extends LazyLogging {
 
         val res =
             fixIndexesReferences("nlpcraft:relation", "indexes", "note", ns, history) &&
-                fixIndexesReferences("nlpcraft:limit", "indexes", "note", ns, history) &&
-                fixIndexesReferencesList("nlpcraft:sort", "subjindexes", "subjnotes", ns, history) &&
-                fixIndexesReferencesList("nlpcraft:sort", "byindexes", "bynotes", ns, history)
+            fixIndexesReferences("nlpcraft:limit", "indexes", "note", ns, history) &&
+            fixIndexesReferencesList("nlpcraft:sort", "subjindexes", "subjnotes", ns, history) &&
+            fixIndexesReferencesList("nlpcraft:sort", "byindexes", "bynotes", ns, history)
 
         if (res) {
             // Validation (all indexes calculated well)
@@ -590,8 +589,7 @@ object NCNlpSentence extends LazyLogging {
             if (lastPhase)
                 dropAbstract(mdl, ns)
 
-            if (collapseSentence(ns, getNotNlpNotes(ns).map(_.noteType).distinct)) Some(ns)
-            else None
+            if (collapseSentence(ns, getNotNlpNotes(ns).map(_.noteType).distinct)) Some(ns) else None
         }
 
         // Always deletes `similar` notes.
@@ -599,27 +597,27 @@ object NCNlpSentence extends LazyLogging {
         // We keep only one variant -  with `best` direct and sparsity parameters,
         // other variants for these words are redundant.
         val redundant: Seq[NCNlpSentenceNote] =
-        thisSen.flatten.filter(!_.isNlp).distinct.
-            groupBy(_.getKey()).
-            map(p ⇒ p._2.sortBy(p ⇒
-                (
-                    // System notes don't have such flags.
-                    if (p.isUser) {
-                        if (p.isDirect)
-                            0
+            thisSen.flatten.filter(!_.isNlp).distinct.
+                groupBy(_.getKey()).
+                map(p ⇒ p._2.sortBy(p ⇒
+                    (
+                        // System notes don't have such flags.
+                        if (p.isUser) {
+                            if (p.isDirect)
+                                0
+                            else
+                                1
+                        }
                         else
-                            1
-                    }
-                    else
-                        0,
-                    if (p.isUser)
-                        p.sparsity
-                    else
-                        0
-                )
-            )).
-            flatMap(_.drop(1)).
-            toSeq
+                            0,
+                        if (p.isUser)
+                            p.sparsity
+                        else
+                            0
+                    )
+                )).
+                flatMap(_.drop(1)).
+                toSeq
 
         redundant.foreach(thisSen.removeNote)
 
@@ -642,62 +640,91 @@ object NCNlpSentence extends LazyLogging {
                     val key = PartKey(note, thisSen)
 
                     val delCombOthers =
-                        delCombs.filter(_ != note).flatMap(n ⇒ if (getPartKeys(n).contains(key)) Some(n)
-                        else None)
+                        delCombs.filter(_ != note).flatMap(n ⇒ if (getPartKeys(n).contains(key)) Some(n) else None)
 
-                    if (delCombOthers.exists(o ⇒ noteWordsIdxs == o.wordIndexes.toSet)) Some(note)
-                    else None
+                    if (delCombOthers.exists(o ⇒ noteWordsIdxs == o.wordIndexes.toSet)) Some(note) else None
                 })
 
         delCombs = delCombs.filter(p ⇒ !swallowed.contains(p))
         addDeleted(thisSen, thisSen, swallowed)
         swallowed.foreach(thisSen.removeNote)
 
-        val toksByIdx: Seq[Set[NCNlpSentenceNote]] =
+        val toksByIdx: Seq[Seq[NCNlpSentenceNote]] =
             delCombs.flatMap(note ⇒ note.wordIndexes.map(_ → note)).
                 groupBy { case (idx, _) ⇒ idx }.
-                map { case (_, seq) ⇒ seq.map { case (_, note) ⇒ note }.toSet }.
+                map { case (_, seq) ⇒ seq.map { case (_, note) ⇒ note } }.
                 toSeq.sortBy(-_.size)
 
-        val minDelSize = if (toksByIdx.isEmpty) 1 else toksByIdx.map(_.size).max - 1
+//        val toksByIdx1 =
+//            delCombs.flatMap(note ⇒ note.wordIndexes.map(_ → note)).
+//                groupBy { case (idx, _) ⇒ idx }.
+//                map { case (idx, seq) ⇒ idx → seq.map { case (_, note) ⇒ note } }.
+//                toSeq.sortBy(_._2.size)
+//
+//        toksByIdx.foreach{ case (seq) ⇒
+//            println(s"toksByIdx seq=${seq.map(i ⇒ s"${i.noteType} ${i.wordIndexes.mkString(",")}").mkString(" | ")}")
+//        }
+
+//        toksByIdx1.sortBy(_._1).foreach{ case (i, seq) ⇒
+//            println(s"toksByIdx1 ${i} seq=${seq.map(i ⇒ s"${i.noteType} ${i.wordIndexes.mkString(",")}").mkString(" | ")}")
+//        }
+
+        val dict = mutable.HashMap.empty[String, NCNlpSentenceNote]
+
+        var i = 'A'
+
+        val converted: Seq[Seq[String]] =
+            toksByIdx.map(seq ⇒ {
+                seq.map(
+                    n ⇒ {
+                        val s = s"$i"
+
+                        i = (i.toInt + 1).toChar
+
+                        dict += s → n
+
+                        s
+                    }
+                )
+            })
+
+        //val minDelSize = if (toksByIdx.isEmpty) 1 else toksByIdx.map(_.size).max - 1
 
         var sens =
             if (delCombs.nonEmpty) {
-                val deleted = mutable.ArrayBuffer.empty[Set[NCNlpSentenceNote]]
+                val p = new ForkJoinPool()
+
+                val tmp = NCComboRecursiveTask.findCombinations(
+                    converted.map(_.asJava).asJava,
+                    new Comparator[String]() {
+                        override def compare(n1: String, n2: String): Int = n1.compareTo(n2)
+                    },
+                    p
+                )
+
+                p.shutdown()
+
+                val seq1 = tmp.asScala.map(_.asScala.map(dict))
 
                 val sens =
-                    (minDelSize to delCombs.size).
-                        flatMap(i ⇒
-                            delCombs.combinations(i).
-                                filter(delComb ⇒
-                                    !toksByIdx.exists(
-                                        rec ⇒
-                                            rec.size - delCombs.size <= 1 &&
-                                                rec.count(note ⇒ !delComb.contains(note)) > 1
-                                    )
-                                )
-                        ).
-                        sortBy(_.size).
-                        map(_.toSet).
-                        flatMap(delComb ⇒
-                            // Already processed with less subset of same deleted tokens.
-                            if (!deleted.exists(_.subsetOf(delComb))) {
-                                val nsClone = thisSen.clone()
-
-                                // Saves deleted notes for sentence and their tokens.
-                                addDeleted(thisSen, nsClone, delComb)
-                                delComb.foreach(nsClone.removeNote)
-
-                                // Has overlapped notes for some tokens.
-                                require(!nsClone.exists(_.count(!_.isNlp) > 1))
-
-                                deleted += delComb
-
-                                collapse0(nsClone)
-                            }
-                            else
-                                None
-                        )
+                    seq1.
+                        flatMap(p ⇒ {
+                            val delComb: Seq[NCNlpSentenceNote] = p
+
+                            val nsClone = thisSen.clone()
+
+                            // Saves deleted notes for sentence and their tokens.
+                            addDeleted(thisSen, nsClone, delComb)
+                            delComb.foreach(nsClone.removeNote)
+
+                            // Has overlapped notes for some tokens.
+                            require(
+                                !nsClone.exists(_.count(!_.isNlp) > 1),
+                                s"Invalid notes: ${nsClone.filter(_.count(!_.isNlp) > 1).mkString("|")}"
+                            )
+
+                            collapse0(nsClone)
+                        })
 
                 // It removes sentences which have only one difference - 'direct' flag of their user tokens.
                 // `Direct` sentences have higher priority.
@@ -719,8 +746,7 @@ object NCNlpSentence extends LazyLogging {
                             p.clone().filter(_._1 != "direct")
                         )
 
-                    (Key(get(sysNotes), get(userNotes)), sen, nlpNotes.map(p ⇒ if (p.isDirect) 0
-                    else 1).sum)
+                    (Key(get(sysNotes), get(userNotes)), sen, nlpNotes.map(p ⇒ if (p.isDirect) 0 else 1).sum)
                 }).
                     foreach { case (key, sen, directCnt) ⇒
                         m.get(key) match {
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/SyTest.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/SyTest.java
new file mode 100644
index 0000000..db5f657
--- /dev/null
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/SyTest.java
@@ -0,0 +1,241 @@
+package org.apache.nlpcraft.common.nlp;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Comparator;
+import java.util.List;
+import java.util.Set;
+import java.util.TreeSet;
+import java.util.concurrent.ForkJoinPool;
+import java.util.concurrent.RecursiveTask;
+import java.util.stream.IntStream;
+
+import static java.util.stream.Collectors.toList;
+import static java.util.Arrays.asList;
+import static java.util.stream.Collectors.toUnmodifiableList;
+
+public class SyTest {
+    public static class ComboSearch extends RecursiveTask<List<Long>> {
+        private static final long THRESHOLD = (long)Math.pow(2, 20);
+
+        private final long lo;
+
+        private final long hi;
+
+        private final long[] wordBits;
+
+        private final int[] wordCounts;
+
+        public ComboSearch(
+            long lo,
+            long hi,
+            long[] words,
+            int[] wordCounts
+        ) {
+            this.lo = lo;
+            this.hi = hi;
+            this.wordBits = words;
+            this.wordCounts = wordCounts;
+        }
+
+        public static <T> List<List<T>> findCombos(List<List<T>> inp, ForkJoinPool pool) {
+            List<List<T>> uniqueInp = inp.stream()
+                .filter(row -> inp.stream().noneMatch(it -> it != row && it.containsAll(row)))
+                .map(i -> i.stream().distinct().sorted().collect(toList()))
+                .collect(toList());
+
+            // Build dictionary of unique words.
+            List<T> dict = uniqueInp.stream()
+                .flatMap(Collection::stream)
+                .distinct()
+                .sorted()
+                .collect(toList());
+
+            System.out.println("uniqueInp="+uniqueInp.size());
+            System.out.println("dict="+dict.size());
+
+            if (dict.size() > Long.SIZE) {
+                // Note: Power set of 64 words results in 9223372036854775807 combinations.
+                throw new IllegalArgumentException("Can handle more than " + Long.SIZE + " unique words in the dictionary.");
+            }
+
+            // Convert words to bitmasks (each bit corresponds to an index in the dictionary).
+            long[] wordBits = uniqueInp.stream()
+                .sorted(Comparator.comparingInt(List::size))
+                .mapToLong(row -> wordsToBits(row, dict))
+                .toArray();
+
+            // Cache words count per row.
+            int[] wordCounts = uniqueInp.stream()
+                .sorted(Comparator.comparingInt(List::size))
+                .mapToInt(List::size)
+                .toArray();
+
+            // Prepare Fork/Join task to iterate over the power set of all combinations.
+            int lo = 1;
+            long hi = (long)Math.pow(2, dict.size());
+
+            ComboSearch task = new ComboSearch(
+                lo,
+                hi,
+                wordBits,
+                wordCounts
+            );
+
+            return pool.invoke(task).stream()
+                .map(bits -> bitsToWords(bits, dict))
+                .collect(toList());
+        }
+
+        @Override
+        protected List<Long> compute() {
+            if (hi - lo <= THRESHOLD) {
+                return computeLocal();
+            } else {
+                return forkJoin();
+            }
+        }
+
+        private List<Long> computeLocal() {
+            List<Long> result = new ArrayList<>();
+
+            for (long comboBits = lo; comboBits < hi; comboBits++) {
+                boolean match = true;
+
+                // For each input row we check if subtracting the current combination of words
+                // from the input row would give us the expected result.
+                for (int j = 0; j < wordBits.length; j++) {
+                    // Get bitmask of how many words can be subtracted from the row.
+                    long commonBits = wordBits[j] & comboBits;
+
+                    int wordsToRemove = Long.bitCount(commonBits);
+
+                    // Check if there is more than 1 word remaining after subtraction.
+                    if (wordCounts[j] - wordsToRemove > 1) {
+                        // Skip this combination.
+                        match = false;
+
+                        break;
+                    }
+                }
+
+                if (match && !includes(comboBits, result)) {
+                    result.add(comboBits);
+                }
+            }
+
+            return result;
+        }
+
+        private List<Long> forkJoin() {
+            long mid = lo + hi >>> 1L;
+
+            ComboSearch t1 = new ComboSearch(lo, mid, wordBits, wordCounts);
+            ComboSearch t2 = new ComboSearch(mid, hi, wordBits, wordCounts);
+
+            t2.fork();
+
+            return merge(t1.compute(), t2.join());
+        }
+
+        private List<Long> merge(List<Long> l1, List<Long> l2) {
+            if (l1.isEmpty()) {
+                return l2;
+            } else if (l2.isEmpty()) {
+                return l1;
+            }
+
+            int size1 = l1.size();
+            int size2 = l2.size();
+
+            if (size1 == 1 && size2 > 1 || size2 == 1 && size1 > 1) {
+                // Minor optimization in case if one of the lists has only one element.
+                List<Long> list = size1 == 1 ? l2 : l1;
+                Long val = size1 == 1 ? l1.get(0) : l2.get(0);
+
+                if (!includes(val, list)) {
+                    list.add(val);
+                }
+
+                return list;
+            } else {
+                List<Long> result = new ArrayList<>(size1 + size2);
+
+                for (int i = 0, max = Math.max(size1, size2); i < max; i++) {
+                    Long v1 = i < size1 ? l1.get(i) : null;
+                    Long v2 = i < size2 ? l2.get(i) : null;
+
+                    if (v1 != null && v2 != null) {
+                        if (containsAllBits(v1, v2)) {
+                            v1 = null;
+                        } else if (containsAllBits(v2, v1)) {
+                            v2 = null;
+                        }
+                    }
+
+                    if (v1 != null && !includes(v1, result)) {
+                        result.add(v1);
+                    }
+
+                    if (v2 != null && !includes(v2, result)) {
+                        result.add(v2);
+                    }
+                }
+
+                return result;
+            }
+        }
+
+        private static boolean includes(long bits, List<Long> allBits) {
+            for (long existing : allBits) {
+                if (containsAllBits(bits, existing)) {
+                    return true;
+                }
+            }
+
+            return false;
+        }
+
+        private static boolean containsAllBits(long bitSet1, long bitSet2) {
+            return (bitSet1 & bitSet2) == bitSet2;
+        }
+
+        private static <T> long wordsToBits(List<T> words, List<T> dict) {
+            long bits = 0;
+
+            for (int i = 0; i < dict.size(); i++) {
+                if (words.contains(dict.get(i))) {
+                    bits |= 1L << i;
+                }
+            }
+
+            return bits;
+        }
+
+        private static <T> List<T> bitsToWords(long bits, List<T> dict) {
+            List<T> words = new ArrayList<>(Long.bitCount(bits));
+
+            for (int i = 0; i < dict.size(); i++) {
+                if ((bits & 1L << i) != 0) {
+                    words.add(dict.get(i));
+                }
+            }
+
+            return words;
+        }
+    }
+
+    public static void main(String[] args) throws InterruptedException {
+        List<List<String>> words = IntStream.range(0, 35)
+            .mapToObj(i -> IntStream.range(0, i + 1).mapToObj(String::valueOf).collect(toUnmodifiableList()))
+            .collect(toUnmodifiableList());
+
+        long t = System.currentTimeMillis();
+
+        ForkJoinPool forkJoinPool = new ForkJoinPool();
+        final List<List<String>> combos = ComboSearch.findCombos(words, forkJoinPool);
+
+        System.out.println("size=" + combos.size());
+        System.out.println("time=" + (System.currentTimeMillis() - t));
+    }
+}
\ No newline at end of file
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/util/NCComboRecursiveTask.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/util/NCComboRecursiveTask.java
new file mode 100644
index 0000000..017c10e
--- /dev/null
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/util/NCComboRecursiveTask.java
@@ -0,0 +1,230 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nlpcraft.common.util;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Comparator;
+import java.util.List;
+import java.util.concurrent.ForkJoinPool;
+import java.util.concurrent.RecursiveTask;
+import java.util.stream.Collectors;
+
+import static java.util.stream.Collectors.toList;
+
+public class NCComboRecursiveTask extends RecursiveTask<List<Long>> {
+    private static final long THRESHOLD = (long)Math.pow(2, 20);
+
+    private final long lo;
+    private final long hi;
+    private final long[] wordBits;
+    private final int[] wordCounts;
+
+    private NCComboRecursiveTask(long lo, long hi, long[] wordBits, int[] wordCounts) {
+        this.lo = lo;
+        this.hi = hi;
+        this.wordBits = wordBits;
+        this.wordCounts = wordCounts;
+    }
+
+    public static <T> List<List<T>> findCombinations(List<List<T>> inp, Comparator<T> comparator, ForkJoinPool pool) {
+        List<List<T>> uniqueInp = inp.stream()
+            .filter(row -> inp.stream().noneMatch(it -> !it.equals(row)  && it.containsAll(row)))
+            .map(i -> i.stream().distinct().sorted(comparator).collect(toList()))
+            .collect(toList());
+
+
+        System.out.println("!!!");
+        for (List<T> ts : uniqueInp) {
+            System.out.println("!!!ts=");
+            System.out.println(ts.stream().map(Object::toString).collect(Collectors.joining("\n")));
+        }
+        System.out.println("!!!");
+
+        // Build dictionary of unique words.
+        List<T> dict = uniqueInp.stream()
+            .flatMap(Collection::stream)
+            .distinct()
+            .sorted(comparator)
+            .collect(toList());
+
+        System.out.println("dict=");
+        System.out.println(dict.stream().map(Object::toString).collect(Collectors.joining("\n")));
+        System.out.println();
+
+        if (dict.size() > Long.SIZE) {
+            // Note: Power set of 64 words results in 9223372036854775807 combinations.
+            throw new IllegalArgumentException("Dictionary is too long: " + dict.size());
+        }
+
+        // Convert words to bitmasks (each bit corresponds to an index in the dictionary).
+        long[] wordBits = uniqueInp.stream()
+            .sorted(Comparator.comparingInt(List::size))
+            .mapToLong(row -> wordsToBits(row, dict))
+            .toArray();
+
+        // Cache words count per row.
+        int[] wordCounts = uniqueInp.stream().sorted(Comparator.comparingInt(List::size)).mapToInt(List::size).toArray();
+
+        // Prepare Fork/Join task to iterate over the power set of all combinations.
+        int lo = 1;
+        long hi = (long)Math.pow(2, dict.size());
+
+        NCComboRecursiveTask task = new NCComboRecursiveTask(lo, hi, wordBits, wordCounts);
+
+        return pool.invoke(task).stream().map(bits -> bitsToWords(bits, dict)).collect(toList());
+    }
+
+    @Override
+    protected List<Long> compute() {
+        return hi - lo <= THRESHOLD ? computeLocal() : forkJoin();
+    }
+
+    private List<Long> computeLocal() {
+        List<Long> result = new ArrayList<>();
+
+        for (long comboBits = lo; comboBits < hi; comboBits++) {
+            boolean match = true;
+
+            // For each input row we check if subtracting the current combination of words
+            // from the input row would give us the expected result.
+            for (int j = 0; j < wordBits.length; j++) {
+                // Get bitmask of how many words can be subtracted from the row.
+                long commonBits = wordBits[j] & comboBits;
+
+                int wordsToRemove = Long.bitCount(commonBits);
+
+                // Check if there is more than 1 word remaining after subtraction.
+                if (wordCounts[j] - wordsToRemove > 1) {
+                    // Skip this combination.
+                    match = false;
+
+                    break;
+                }
+            }
+
+            if (match && !includes(comboBits, result)) {
+                result.add(comboBits);
+            }
+        }
+
+        return result;
+    }
+
+    private List<Long> forkJoin() {
+        long mid = lo + hi >>> 1L;
+
+        NCComboRecursiveTask t1 = new NCComboRecursiveTask(lo, mid, wordBits, wordCounts);
+        NCComboRecursiveTask t2 = new NCComboRecursiveTask(mid, hi, wordBits, wordCounts);
+
+        t2.fork();
+
+        return merge(t1.compute(), t2.join());
+    }
+
+    private List<Long> merge(List<Long> l1, List<Long> l2) {
+        if (l1.isEmpty()) {
+            return l2;
+        }
+        else if (l2.isEmpty()) {
+            return l1;
+        }
+
+        int size1 = l1.size();
+        int size2 = l2.size();
+
+        if (size1 == 1 && size2 > 1 || size2 == 1 && size1 > 1) {
+            // Minor optimization in case if one of the lists has only one element.
+            List<Long> list = size1 == 1 ? l2 : l1;
+            Long val = size1 == 1 ? l1.get(0) : l2.get(0);
+
+            if (!includes(val, list)) {
+                list.add(val);
+            }
+
+            return list;
+        }
+        else {
+            List<Long> result = new ArrayList<>(size1 + size2);
+
+            for (int i = 0, max = Math.max(size1, size2); i < max; i++) {
+                Long v1 = i < size1 ? l1.get(i) : null;
+                Long v2 = i < size2 ? l2.get(i) : null;
+
+                if (v1 != null && v2 != null) {
+                    if (containsAllBits(v1, v2)) {
+                        v1 = null;
+                    }
+                    else if (containsAllBits(v2, v1)) {
+                        v2 = null;
+                    }
+                }
+
+                if (v1 != null && !includes(v1, result)) {
+                    result.add(v1);
+                }
+
+                if (v2 != null && !includes(v2, result)) {
+                    result.add(v2);
+                }
+            }
+
+            return result;
+        }
+    }
+
+    private static boolean includes(long bits, List<Long> allBits) {
+        for (int i = 0, size = allBits.size(); i < size; i++) {
+            long existing = allBits.get(i);
+
+            if (containsAllBits(bits, existing)) {
+                return true;
+            }
+        }
+
+        return false;
+    }
+
+    private static boolean containsAllBits(long bitSet1, long bitSet2) {
+        return (bitSet1 & bitSet2) == bitSet2;
+    }
+
+    private static <T> long wordsToBits(List<T> words, List<T> dict) {
+        long bits = 0;
+
+        for (int i = 0; i < dict.size(); i++) {
+            if (words.contains(dict.get(i))) {
+                bits |= 1L << i;
+            }
+        }
+
+        return bits;
+    }
+
+    private static <T> List<T> bitsToWords(long bits, List<T> dict) {
+        List<T> words = new ArrayList<>(Long.bitCount(bits));
+
+        for (int i = 0; i < dict.size(); i++) {
+            if ((bits & 1L << i) != 0) {
+                words.add(dict.get(i));
+            }
+        }
+
+        return words;
+    }
+}
diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/SyTest.java b/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/SyTest.java
new file mode 100644
index 0000000..69d9ab4
--- /dev/null
+++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/SyTest.java
@@ -0,0 +1,278 @@
+package org.apache.nlpcraft.probe.mgrs.nlp.enrichers;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Comparator;
+import java.util.HashSet;
+import java.util.List;
+import java.util.NavigableSet;
+import java.util.Optional;
+import java.util.Set;
+import java.util.TreeSet;
+import java.util.concurrent.CopyOnWriteArrayList;
+import java.util.concurrent.CountDownLatch;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.TimeUnit;
+import java.util.stream.Collectors;
+
+import static java.util.Arrays.asList;
+import static java.util.stream.Collectors.toList;
+import static java.util.stream.Collectors.toSet;
+
+public class SyTest {
+    public static void main(String[] args) {
+//        List<List<String>> words = asList(
+//            asList("A", "B", "C"),
+//            asList("B", "C", "D"),
+//            asList("B", "D")
+//        );
+        List<List<String>> words = asList(
+            asList("A", "B"),
+            asList("C", "B"),
+            asList("D", "E"),
+            asList("D", "F"),
+            asList("G", "H"),
+            asList("I", "H"),
+            asList("J", "K"),
+            asList("L", "K"),
+            asList("M", "N"),
+            asList("M", "O"),
+            asList("P", "Q"),
+            asList("P", "R"),
+            asList("S", "T"),
+            asList("S", "U"),
+            asList("V", "W"),
+            asList("X", "W")
+            ,
+            asList("Y", "Z"),
+            asList("A1", "A2"),
+            asList("A3", "A3"),
+            asList("A4", "A5", "A6")
+        );
+
+        System.out.println(
+            "Dictionary size:"
+                + words.stream()
+                .flatMap(Collection::stream)
+                .distinct()
+                .count()
+        );
+
+        System.out.println("===== Performance =====");
+
+        for (int i = 0; i < 1; i++) {
+            long t = System.currentTimeMillis();
+
+            Set<Set<String>> combos = findCombos(words);
+
+
+
+            System.out.println("Iteration " + i + " Time: " + (System.currentTimeMillis() - t) + ", resCnt=" + combos.size());
+        }
+
+        if (true) {
+            return;
+        }
+
+        Set<Set<String>> combos = findCombos(words);
+
+        System.out.println();
+        System.out.println("===== Result =====");
+        System.out.println("Total combos: " + combos.size());
+        System.out.println();
+//        combos.stream()
+//            .sorted(Comparator.comparing(Collection::size))
+//            .forEach(combo ->
+//                print(words, combo)
+//            );
+    }
+
+    public static <T extends Comparable<T>> Set<Set<T>> findCombos(List<List<T>> inp) {
+
+
+        List<List<T>> uniqueInp = inp.stream()
+            .filter(row -> inp.stream().noneMatch(it -> it != row && it.containsAll(row)))
+            .map(i -> i.stream().distinct().sorted().collect(toList()))
+            .collect(toList());
+
+        // Build dictionary of unique words.
+        List<T> dict = uniqueInp.stream()
+            .flatMap(Collection::stream)
+            .distinct()
+            .sorted()
+            .collect(toList());
+
+        if (dict.size() > Integer.SIZE) {
+            // Note: Power set of 32 words results in 4294967296 combinations.
+            throw new IllegalArgumentException("Can handle more than " + Integer.SIZE + " unique words in the dictionary.");
+        }
+
+        // Convert words to bitmasks (each bit corresponds to an index in the dictionary).
+        int[] wordBits = uniqueInp.stream()
+            .sorted(Comparator.comparingInt(List::size))
+            .mapToInt(row -> wordsToBits(row, dict))
+            .toArray();
+
+        // Cache words count per row.
+        int[] wordCounts = uniqueInp.stream()
+            .sorted(Comparator.comparingInt(List::size))
+            .mapToInt(List::size)
+            .toArray();
+
+        int min = 1;
+        int max = (int)Math.pow(2, dict.size()) - 1;
+
+        int batchFactor = 100;
+        int threads = 13;
+
+        ExecutorService pool = Executors.newFixedThreadPool(threads);
+        CountDownLatch cdl = new CountDownLatch(batchFactor);
+
+        int divRes = max / batchFactor;
+        int divRest = max % batchFactor;
+
+        int to = 0;
+
+        List<Integer> result = new CopyOnWriteArrayList<>();
+
+        for (int k = 0; k < batchFactor; k++) {
+            to += divRes;
+
+            if (k == divRes - 1) {
+                to += divRest;
+            }
+
+            int toFinal = to;
+            int fromFinal = min + k * divRes;
+
+            pool.execute(
+                () -> {
+                    List<Integer> locRes = new ArrayList<>();
+
+                    for (int comboBits = fromFinal; comboBits < toFinal; comboBits++) {
+                        boolean match = true;
+
+                        // For each input row we check if subtracting the current combination of words
+                        // from the input row would give us the expected result.
+                        for (int j = 0; j < wordBits.length; j++) {
+                            // Get bitmask of how many words can be subtracted from the row.
+                            int commonBits = wordBits[j] & comboBits;
+
+                            int wordsToRemove = Integer.bitCount(commonBits);
+
+                            // Check if there are more than 1 word remaining after subtraction.
+                            if (wordCounts[j] - wordsToRemove > 1) {
+                                // Skip this combination.
+                                match = false;
+
+                                break;
+                            }
+                        }
+
+                        if (match && !includes(comboBits, locRes)) {
+                            locRes.add(comboBits);
+                        }
+                    }
+
+                    result.addAll(locRes);
+
+                    cdl.countDown();
+                }
+            );
+        }
+
+// Iterate over the power set.
+
+        //pool.shutdown();
+        try {
+            cdl.await(Long.MAX_VALUE, TimeUnit.MILLISECONDS);
+            //pool.awaitTermination(Long.MAX_VALUE, TimeUnit.MILLISECONDS);
+        } catch (InterruptedException e) {
+            e.printStackTrace();
+        }
+
+        // Convert found results from bitmasks back to words.
+        TreeSet<Set<T>> treeSet = new TreeSet<>(Comparator.comparingInt(Set::size));
+
+        treeSet.addAll(result.stream().map(bits -> bitsToWords(bits, dict)).collect(toSet()));
+
+        Set<Set<T>> normCombs = new HashSet<>();
+
+        for (Set<T> set : treeSet) {
+            boolean b = true;
+
+            for (Set<T> added : normCombs) {
+                if (added.containsAll(set)) {
+                    b = false;
+
+                    break;
+                }
+            }
+
+            if (b) {
+                normCombs.add(set);
+            }
+        }
+
+        return normCombs;
+
+    }
+
+    private static <T> Set<Set<T>> squeeze(Set<Set<T>> combs) {
+        Set<Set<T>> normCombs = new HashSet<>();
+
+        combs.stream().sorted(Comparator.comparingInt(Set::size)).forEach(comb -> {
+            // Skips already added shorter variants.
+            if (normCombs.stream().filter(comb::containsAll).findAny().isEmpty()) {
+                normCombs.add(comb);
+            }
+        });
+
+        return normCombs;
+    }
+
+
+    private static boolean includes(int bits, List<Integer> allBits) {
+        for (int existing : allBits) {
+            if ((bits & existing) == existing) {
+                return true;
+            }
+        }
+
+        return false;
+    }
+
+    private static <T> int wordsToBits(List<T> words, List<T> dict) {
+        int bits = 0;
+
+        for (int i = 0; i < dict.size(); i++) {
+            if (words.contains(dict.get(i))) {
+                bits |= 1 << i;
+            }
+        }
+
+        return bits;
+    }
+
+    private static <T> Set<T> bitsToWords(int bits, List<T> dict) {
+        Set<T> words = new HashSet<>(Integer.bitCount(bits));
+
+        for (int i = 0; i < dict.size(); i++) {
+            if ((bits & 1 << i) != 0) {
+                words.add(dict.get(i));
+            }
+        }
+
+        return words;
+    }
+
+    private static void print(List<List<String>> inp, List<String> combo) {
+        System.out.println("==== " + combo + "(" + combo.size() + ')');
+        inp.stream().forEach(row -> {
+            Set<String> s = new TreeSet<>(row);
+            s.removeAll(combo);
+            System.out.println(s);
+        });
+    }
+}

[incubator-nlpcraft] 05/05: Merge branch 'master' into NLPCRAFT-261

Posted by se...@apache.org.

This is an automated email from the ASF dual-hosted git repository.

sergeykamov pushed a commit to branch NLPCRAFT-261
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git

commit 8a3fcbde28a123274ad2b7d78ebcd892eeadbdae
Merge: 854700c c999e3c
Author: Sergey Kamov <sk...@gmail.com>
AuthorDate: Mon Mar 8 15:25:22 2021 +0300

    Merge branch 'master' into NLPCRAFT-261
    
    # Conflicts:
    #	nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala

 .../org/apache/nlpcraft/model/tools/cmdline/NCCli.scala | 17 ++++++++++-------
 .../mgrs/nlp/enrichers/model/NCModelEnricher.scala      |  1 +
 2 files changed, 11 insertions(+), 7 deletions(-)

diff --cc nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
index 82edf69,2a9dec0..13b7d45
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
@@@ -24,8 -24,7 +24,9 @@@ import org.apache.nlpcraft.model.
  import org.apache.nlpcraft.probe.mgrs.NCProbeSynonymChunkKind.{NCSynonymChunkKind, TEXT}
  import org.apache.nlpcraft.probe.mgrs.nlp.NCProbeEnricher
  import org.apache.nlpcraft.probe.mgrs.nlp.impl.NCRequestImpl
 +import org.apache.nlpcraft.probe.mgrs.sentence.NCSentenceManager
 +import org.apache.nlpcraft.probe.mgrs.{NCProbeModel, NCProbeSynonym, NCProbeVariants}
+ import org.apache.nlpcraft.probe.mgrs.{NCProbeModel, NCProbeSynonym, NCProbeVariants}
  
  import java.io.Serializable
  import java.util

[incubator-nlpcraft] 04/05: WIP.

Posted by se...@apache.org.

This is an automated email from the ASF dual-hosted git repository.

sergeykamov pushed a commit to branch NLPCRAFT-261
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git

commit 854700cc18384e701de42a2920df9000a6b43db0
Author: Sergey Kamov <sk...@gmail.com>
AuthorDate: Mon Mar 8 15:23:29 2021 +0300

    WIP.
---
 .../mgrs/sentence/NCComboHelper.java}              | 155 +++++++++------------
 1 file changed, 65 insertions(+), 90 deletions(-)

diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/util/NCComboRecursiveTask.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/sentence/NCComboHelper.java
similarity index 52%
rename from nlpcraft/src/main/scala/org/apache/nlpcraft/common/util/NCComboRecursiveTask.java
rename to nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/sentence/NCComboHelper.java
index 834735b..da2d3fd 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/util/NCComboRecursiveTask.java
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/sentence/NCComboHelper.java
@@ -15,21 +15,22 @@
  * limitations under the License.
  */
 
-package org.apache.nlpcraft.common.util;
+package org.apache.nlpcraft.probe.mgrs.sentence;
 
 import java.util.ArrayList;
 import java.util.Collection;
-import java.util.Collections;
 import java.util.Comparator;
-import java.util.HashSet;
 import java.util.List;
+import java.util.Set;
 import java.util.concurrent.ForkJoinPool;
 import java.util.concurrent.RecursiveTask;
-import java.util.stream.Collectors;
 
 import static java.util.stream.Collectors.toList;
 
-public class NCComboRecursiveTask extends RecursiveTask<List<Long>> {
+/**
+ *
+ */
+class NCComboHelper extends RecursiveTask<List<Long>> {
     private static final long THRESHOLD = (long)Math.pow(2, 20);
 
     private final long lo;
@@ -37,50 +38,44 @@ public class NCComboRecursiveTask extends RecursiveTask<List<Long>> {
     private final long[] wordBits;
     private final int[] wordCounts;
 
-    private NCComboRecursiveTask(long lo, long hi, long[] wordBits, int[] wordCounts) {
+    private NCComboHelper(long lo, long hi, long[] wordBits, int[] wordCounts) {
         this.lo = lo;
         this.hi = hi;
         this.wordBits = wordBits;
         this.wordCounts = wordCounts;
     }
 
-    public static <T> List<List<T>> findCombinations(List<List<T>> inp, ForkJoinPool pool) {
-        List<List<T>> uniqueInp = inp;
-
+    /**
+     *
+     * @param words
+     * @param pool
+     * @param <T>
+     * @return
+     */
+    static <T> List<List<T>> findCombinations(List<Set<T>> words, ForkJoinPool pool) {
         // Build dictionary of unique words.
-        List<T> dict = uniqueInp.stream()
-            .flatMap(Collection::stream)
-            .distinct()
-            .collect(toList());
-
-        System.out.println("inp=" + inp);
-        System.out.println("dict=" + dict);
+        List<T> dict = words.stream().flatMap(Collection::stream).distinct().collect(toList());
 
-        if (dict.size() > Long.SIZE) {
+        if (dict.size() > Long.SIZE)
             // Note: Power set of 64 words results in 9223372036854775807 combinations.
             throw new IllegalArgumentException("Dictionary is too long: " + dict.size());
-        }
 
         // Convert words to bitmasks (each bit corresponds to an index in the dictionary).
-        long[] wordBits = uniqueInp.stream()
-            .sorted(Comparator.comparingInt(List::size))
-            .mapToLong(row -> wordsToBits(row, dict))
-            .toArray();
+        long[] wordBits =
+            words.stream().sorted(Comparator.comparingInt(Set::size)).mapToLong(row -> wordsToBits(row, dict)).toArray();
 
         // Cache words count per row.
-        int[] wordCounts = uniqueInp.stream().sorted(Comparator.comparingInt(List::size)).mapToInt(List::size).toArray();
+        int[] wordCounts = words.stream().sorted(Comparator.comparingInt(Set::size)).mapToInt(Set::size).toArray();
 
         // Prepare Fork/Join task to iterate over the power set of all combinations.
-        int lo = 1;
-        long hi = (long)Math.pow(2, dict.size());
-
-        NCComboRecursiveTask task = new NCComboRecursiveTask(lo, hi, wordBits, wordCounts);
-
-        final List<List<T>> res = pool.invoke(task).stream().map(bits -> bitsToWords(bits, dict)).collect(toList());
-
-        System.out.println("res=" + res);
-
-        return res;
+        return pool.invoke(
+            new NCComboHelper(
+                1,
+                (long)Math.pow(2, dict.size()),
+                wordBits,
+                wordCounts
+            )
+        ).stream().map(bits -> bitsToWords(bits, dict)).collect(toList());
     }
 
     @Override
@@ -89,7 +84,7 @@ public class NCComboRecursiveTask extends RecursiveTask<List<Long>> {
     }
 
     private List<Long> computeLocal() {
-        List<Long> result = new ArrayList<>();
+        List<Long> res = new ArrayList<>();
 
         for (long comboBits = lo; comboBits < hi; comboBits++) {
             boolean match = true;
@@ -98,12 +93,8 @@ public class NCComboRecursiveTask extends RecursiveTask<List<Long>> {
             // from the input row would give us the expected result.
             for (int j = 0; j < wordBits.length; j++) {
                 // Get bitmask of how many words can be subtracted from the row.
-                long commonBits = wordBits[j] & comboBits;
-
-                int wordsToRemove = Long.bitCount(commonBits);
-
                 // Check if there is more than 1 word remaining after subtraction.
-                if (wordCounts[j] - wordsToRemove > 1) {
+                if (wordCounts[j] - Long.bitCount(wordBits[j] & comboBits) > 1) {
                     // Skip this combination.
                     match = false;
 
@@ -111,19 +102,18 @@ public class NCComboRecursiveTask extends RecursiveTask<List<Long>> {
                 }
             }
 
-            if (match && !includes(comboBits, result)) {
-                result.add(comboBits);
-            }
+            if (match && !includes(comboBits, res))
+                res.add(comboBits);
         }
 
-        return result;
+        return res;
     }
 
     private List<Long> forkJoin() {
         long mid = lo + hi >>> 1L;
 
-        NCComboRecursiveTask t1 = new NCComboRecursiveTask(lo, mid, wordBits, wordCounts);
-        NCComboRecursiveTask t2 = new NCComboRecursiveTask(mid, hi, wordBits, wordCounts);
+        NCComboHelper t1 = new NCComboHelper(lo, mid, wordBits, wordCounts);
+        NCComboHelper t2 = new NCComboHelper(mid, hi, wordBits, wordCounts);
 
         t2.fork();
 
@@ -131,63 +121,52 @@ public class NCComboRecursiveTask extends RecursiveTask<List<Long>> {
     }
 
     private List<Long> merge(List<Long> l1, List<Long> l2) {
-        if (l1.isEmpty()) {
+        if (l1.isEmpty())
             return l2;
-        }
-        else if (l2.isEmpty()) {
+        else if (l2.isEmpty())
             return l1;
-        }
 
         int size1 = l1.size();
         int size2 = l2.size();
 
         if (size1 == 1 && size2 > 1 || size2 == 1 && size1 > 1) {
             // Minor optimization in case if one of the lists has only one element.
-            List<Long> list = size1 == 1 ? l2 : l1;
+            List<Long> res = size1 == 1 ? l2 : l1;
             Long val = size1 == 1 ? l1.get(0) : l2.get(0);
 
-            if (!includes(val, list)) {
-                list.add(val);
-            }
+            if (!includes(val, res))
+                res.add(val);
 
-            return list;
+            return res;
         }
-        else {
-            List<Long> result = new ArrayList<>(size1 + size2);
-
-            for (int i = 0, max = Math.max(size1, size2); i < max; i++) {
-                Long v1 = i < size1 ? l1.get(i) : null;
-                Long v2 = i < size2 ? l2.get(i) : null;
-
-                if (v1 != null && v2 != null) {
-                    if (containsAllBits(v1, v2)) {
-                        v1 = null;
-                    }
-                    else if (containsAllBits(v2, v1)) {
-                        v2 = null;
-                    }
-                }
 
-                if (v1 != null && !includes(v1, result)) {
-                    result.add(v1);
-                }
+        List<Long> res = new ArrayList<>(size1 + size2);
 
-                if (v2 != null && !includes(v2, result)) {
-                    result.add(v2);
-                }
+        for (int i = 0, max = Math.max(size1, size2); i < max; i++) {
+            Long v1 = i < size1 ? l1.get(i) : null;
+            Long v2 = i < size2 ? l2.get(i) : null;
+
+            if (v1 != null && v2 != null) {
+                if (containsAllBits(v1, v2))
+                    v1 = null;
+                else if (containsAllBits(v2, v1))
+                    v2 = null;
             }
 
-            return result;
+            if (v1 != null && !includes(v1, res))
+                res.add(v1);
+
+            if (v2 != null && !includes(v2, res))
+                res.add(v2);
         }
+
+        return res;
     }
 
     private static boolean includes(long bits, List<Long> allBits) {
-        for (int i = 0, size = allBits.size(); i < size; i++) {
-            long existing = allBits.get(i);
-
-            if (containsAllBits(bits, existing)) {
+        for (int i = 0, n = allBits.size(); i < n; i++) {
+            if (containsAllBits(bits, allBits.get(i)))
                 return true;
-            }
         }
 
         return false;
@@ -197,14 +176,12 @@ public class NCComboRecursiveTask extends RecursiveTask<List<Long>> {
         return (bitSet1 & bitSet2) == bitSet2;
     }
 
-    private static <T> long wordsToBits(List<T> words, List<T> dict) {
+    private static <T> long wordsToBits(Set<T> words, List<T> dict) {
         long bits = 0;
 
-        for (int i = 0; i < dict.size(); i++) {
-            if (words.contains(dict.get(i))) {
+        for (int i = 0, n = dict.size(); i < n; i++)
+            if (words.contains(dict.get(i)))
                 bits |= 1L << i;
-            }
-        }
 
         return bits;
     }
@@ -212,11 +189,9 @@ public class NCComboRecursiveTask extends RecursiveTask<List<Long>> {
     private static <T> List<T> bitsToWords(long bits, List<T> dict) {
         List<T> words = new ArrayList<>(Long.bitCount(bits));
 
-        for (int i = 0; i < dict.size(); i++) {
-            if ((bits & 1L << i) != 0) {
+        for (int i = 0, n = dict.size(); i < n; i++)
+            if ((bits & 1L << i) != 0)
                 words.add(dict.get(i));
-            }
-        }
 
         return words;
     }

[incubator-nlpcraft] 03/05: WIP.

Posted by se...@apache.org.

This is an automated email from the ASF dual-hosted git repository.

sergeykamov pushed a commit to branch NLPCRAFT-261
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git

commit 7815090de3aa042a4839793cdb65cd1677afbb86
Author: Sergey Kamov <sk...@gmail.com>
AuthorDate: Mon Mar 8 15:23:17 2021 +0300

    WIP.
---
 .../apache/nlpcraft/common/nlp/NCNlpSentence.scala | 776 +--------------------
 .../org/apache/nlpcraft/common/nlp/SyTest.java     | 241 -------
 .../org/apache/nlpcraft/probe/NCProbeBoot.scala    |   2 +
 .../probe/mgrs/nlp/NCProbeEnrichmentManager.scala  |   3 +-
 .../mgrs/nlp/enrichers/model/NCModelEnricher.scala |   7 +-
 .../mgrs/sentence/NCSentenceManager.scala}         | 380 +++-------
 .../nlpcraft/probe/mgrs/nlp/enrichers/SyTest.java  | 278 --------
 .../model/NCEnricherNestedModelSpec4.scala         |   2 +-
 .../probe/mgrs/nlp/enrichers/model/Test1.java      | 135 ----
 .../probe/mgrs/nlp/enrichers/model/Test2.java      | 114 ---
 10 files changed, 108 insertions(+), 1830 deletions(-)

diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala
index 95a98a3..11d515a 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala
@@ -18,778 +18,17 @@
 package org.apache.nlpcraft.common.nlp
 
 import com.typesafe.scalalogging.LazyLogging
-import org.apache.nlpcraft.common.NCE
-import org.apache.nlpcraft.common.nlp.pos.NCPennTreebank
-import org.apache.nlpcraft.common.util.NCComboRecursiveTask
-import org.apache.nlpcraft.model.NCModel
-
-import java.io.{Serializable => JSerializable}
-import java.util
-import java.util.concurrent.ForkJoinPool
-import java.util.{Collections, Comparator, List => JList}
+
+import java.io.{Serializable ⇒ JSerializable}
+import java.util.{Collections, List ⇒ JList}
 import scala.collection.JavaConverters._
-import scala.collection.mutable.ArrayBuffer
 import scala.collection.{Map, Seq, Set, mutable}
 import scala.language.implicitConversions
 
 object NCNlpSentence extends LazyLogging {
-    implicit def toTokens(x: NCNlpSentence): ArrayBuffer[NCNlpSentenceToken] = x.tokens
-
     case class NoteKey(start: Int, end: Int)
     case class TokenKey(id: String, start: Int, end: Int)
     case class NoteLink(note: String, indexes: Seq[Int])
-
-    case class PartKey(id: String, start: Int, end: Int) {
-        require(start <= end)
-
-        private def in(i: Int): Boolean = i >= start && i <= end
-        def intersect(id: String, start: Int, end: Int): Boolean = id == this.id && (in(start) || in(end))
-    }
-
-    object PartKey {
-        def apply(m: util.HashMap[String, JSerializable]): PartKey = {
-            def get[T](name: String): T = m.get(name).asInstanceOf[T]
-
-            PartKey(get("id"), get("startcharindex"), get("endcharindex"))
-        }
-
-        def apply(t: NCNlpSentenceNote, sen: NCNlpSentence): PartKey =
-            PartKey(t.noteType, sen(t.tokenFrom).startCharIndex, sen(t.tokenTo).endCharIndex)
-    }
-
-    private def getLinks(notes: Seq[NCNlpSentenceNote]): Seq[NoteLink] = {
-        val noteLinks = mutable.ArrayBuffer.empty[NoteLink]
-
-        for (n ← notes.filter(n ⇒ n.noteType == "nlpcraft:limit" || n.noteType == "nlpcraft:references"))
-            noteLinks += NoteLink(n("note").asInstanceOf[String], n("indexes").asInstanceOf[JList[Int]].asScala.sorted)
-
-        for (n ← notes.filter(_.noteType == "nlpcraft:sort")) {
-            def add(noteName: String, idxsName: String): Unit = {
-                val names = n(noteName).asInstanceOf[JList[String]]
-                val idxsSeq = n(idxsName).asInstanceOf[JList[JList[Int]]]
-
-                require(names.size() == idxsSeq.size())
-
-                noteLinks ++=
-                    (for ((name, idxs) ← names.asScala.zip(idxsSeq.asScala.map(_.asScala)))
-                        yield NoteLink(name, idxs.sorted)
-                    )
-            }
-
-            if (n.contains("subjnotes")) add("subjnotes", "subjindexes")
-            if (n.contains("bynotes")) add("bynotes", "byindexes")
-        }
-
-        noteLinks
-    }
-
-    private def getPartKeys(notes: NCNlpSentenceNote*): Seq[PartKey] =
-        notes.
-            filter(_.isUser).
-            flatMap(n ⇒ {
-                val optList: Option[JList[util.HashMap[String, JSerializable]]] = n.dataOpt("parts")
-
-                optList
-            }).flatMap(_.asScala).map(m ⇒ PartKey(m)).distinct
-
-    /**
-      *
-      * @param ns
-      * @param idxs
-      * @param notesType
-      * @param note
-      * @return
-      */
-    private def checkRelation(ns: NCNlpSentence, idxs: Seq[Int], notesType: String, note: NCNlpSentenceNote): Boolean = {
-        val types = idxs.flatMap(idx ⇒ ns(idx).map(p ⇒ p).filter(!_.isNlp).map(_.noteType)).distinct
-
-        /**
-          * Example:
-          * 1. Sentence 'maximum x' (single element related function)
-          * - maximum is aggregate function linked to date element.
-          * - x defined as 2 elements: date and num.
-          * So, the variant 'maximum x (as num)' should be excluded.
-          * *
-          * 2. Sentence 'compare x and y' (multiple elements related function)
-          * - compare is relation function linked to date element.
-          * - x an y defined as 2 elements: date and num.
-          * So, variants 'x (as num) and x (as date)'  and 'x (as date) and x (as num)'
-          * should not be excluded, but invalid relation should be deleted for these combinations.
-          */
-        types.size match {
-            case 0 ⇒ false
-            case 1 ⇒ types.head == notesType
-            case _ ⇒
-                // Equal elements should be processed together with function element.
-                if (types.size == 1)
-                    false
-                else {
-                    ns.removeNote(note)
-
-                    logger.trace(s"Removed note: $note")
-
-                    true
-                }
-        }
-    }
-
-    /**
-      * Fixes notes with references to other notes indexes.
-      * Note that 'idxsField' is 'indexes' and 'noteField' is 'note' for all kind of references.
-      *
-      * @param noteType Note type.
-      * @param idxsField Indexes field.
-      * @param noteField Note field.
-      * @param ns Sentence.
-      * @param history Indexes transformation history.
-      * @return Valid flag.
-      */
-    private def fixIndexesReferences(
-        noteType: String,
-        idxsField: String,
-        noteField: String,
-        ns: NCNlpSentence,
-        history: Seq[(Int, Int)]
-    ): Boolean = {
-        ns.filter(_.isTypeOf(noteType)).foreach(tok ⇒
-            tok.getNoteOpt(noteType, idxsField) match {
-                case Some(n) ⇒
-                    val idxs: Seq[Int] = n.data[JList[Int]](idxsField).asScala
-                    var fixed = idxs
-
-                    history.foreach { case (idxOld, idxNew) ⇒ fixed = fixed.map(i ⇒ if (i == idxOld) idxNew else i) }
-
-                    fixed = fixed.distinct
-
-                    if (idxs != fixed)
-                        ns.fixNote(n, "indexes" → fixed.asJava.asInstanceOf[JSerializable])
-                case None ⇒ // No-op.
-            }
-        )
-
-        ns.flatMap(_.getNotes(noteType)).forall(
-            n ⇒ checkRelation(ns, n.data[JList[Int]]("indexes").asScala, n.data[String](noteField), n)
-        )
-    }
-
-    /**
-      *
-      * @param note
-      * @param idxsField
-      * @param noteField
-      * @param ns
-      */
-    private def fixNoteIndexes(note: String, idxsField: String, noteField: String, ns: NCNlpSentence): Unit =
-        ns.flatMap(_.getNotes(note)).foreach(
-            n ⇒ checkRelation(ns, n.data[JList[Int]](idxsField).asScala, n.data[String](noteField), n)
-        )
-
-    /**
-      *
-      * @param note
-      * @param idxsField
-      * @param noteField
-      * @param ns
-      */
-    private def fixNoteIndexesList(note: String, idxsField: String, noteField: String, ns: NCNlpSentence): Unit = {
-        ns.flatMap(_.getNotes(note)).foreach(rel ⇒
-            rel.dataOpt[JList[JList[Int]]](idxsField) match {
-                case Some(idxsList) ⇒
-                    val notesTypes = rel.data[JList[String]](noteField)
-
-                    require(idxsList.size() == notesTypes.size())
-
-                    idxsList.asScala.zip(notesTypes.asScala).foreach {
-                        case (idxs, notesType) ⇒ checkRelation(ns, idxs.asScala, notesType, rel)
-                    }
-                case None ⇒ // No-op.
-            }
-        )
-    }
-
-    /**
-      * Copies token.
-      *
-      * @param ns Sentence.
-      * @param history Indexes transformation history.
-      * @param toksCopy Copied tokens.
-      * @param i Index.
-      */
-    private def simpleCopy(
-        ns: NCNlpSentence,
-        history: mutable.ArrayBuffer[(Int, Int)],
-        toksCopy: NCNlpSentence, i: Int
-    ): Seq[NCNlpSentenceToken] = {
-        val tokCopy = toksCopy(i)
-
-        history += tokCopy.index → ns.size
-
-        ns += tokCopy.clone(ns.size)
-    }
-
-    /**
-      * Glues stop words.
-      *
-      * @param ns Sentence.
-      * @param userNoteTypes Notes types.
-      * @param history Indexes transformation history.
-      */
-    private def unionStops(
-        ns: NCNlpSentence,
-        userNoteTypes: Seq[String],
-        history: mutable.ArrayBuffer[(Int, Int)]
-    ): Unit = {
-        // Java collection used because using scala collections (mutable.Buffer.empty[mutable.Buffer[Token]]) is reason
-        // Of compilation errors which seems as scala compiler internal error.
-        val bufs = new util.ArrayList[mutable.Buffer[NCNlpSentenceToken]]()
-
-        def last[T](l: JList[T]): T = l.get(l.size() - 1)
-
-        ns.filter(t ⇒ t.isStopWord && !t.isBracketed).foreach(t ⇒
-            if (!bufs.isEmpty && last(bufs).last.index + 1 == t.index)
-                last(bufs) += t
-            else
-                bufs.add(mutable.Buffer.empty[NCNlpSentenceToken] :+ t)
-        )
-
-        val idxsSeq = bufs.asScala.filter(_.lengthCompare(1) > 0).map(_.map(_.index))
-
-        if (idxsSeq.nonEmpty) {
-            val nsCopyToks = ns.clone()
-            ns.clear()
-
-            val buf = mutable.Buffer.empty[Int]
-
-            for (i ← nsCopyToks.indices)
-                idxsSeq.find(_.contains(i)) match {
-                    case Some(idxs) ⇒
-                        if (!buf.contains(idxs.head)) {
-                            buf += idxs.head
-
-                            ns += mkCompound(ns, nsCopyToks, idxs, stop = true, ns.size, None, history)
-                        }
-                    case None ⇒ simpleCopy(ns, history, nsCopyToks, i)
-                }
-
-            fixIndexes(ns, userNoteTypes)
-        }
-    }
-
-    /**
-      * Fixes indexes for all notes after recreating tokens.
-      *
-      * @param ns Sentence.
-      * @param userNoteTypes Notes types.
-      */
-    private def fixIndexes(ns: NCNlpSentence, userNoteTypes: Seq[String]) {
-        // Replaces other notes indexes.
-        for (t ← userNoteTypes :+ "nlpcraft:nlp"; note ← ns.getNotes(t)) {
-            val toks = ns.filter(_.contains(note)).sortBy(_.index)
-
-            val newNote = note.clone(toks.map(_.index), toks.flatMap(_.wordIndexes).sorted)
-
-            toks.foreach(t ⇒ {
-                t.remove(note)
-                t.add(newNote)
-            })
-        }
-
-        // Special case - field index of core NLP note.
-        ns.zipWithIndex.foreach { case (tok, idx) ⇒ ns.fixNote(tok.getNlpNote, "index" → idx) }
-    }
-
-    /**
-      * Zip notes with same type.
-      *
-      * @param ns Sentence.
-      * @param nType Notes type.
-      * @param userNotesTypes Notes types.
-      * @param history Indexes transformation history.
-      */
-    private def zipNotes(
-        ns: NCNlpSentence,
-        nType: String,
-        userNotesTypes: Seq[String],
-        history: mutable.ArrayBuffer[(Int, Int)]
-    ): Unit = {
-        val nts = ns.getNotes(nType).filter(n ⇒ n.tokenFrom != n.tokenTo).sortBy(_.tokenFrom)
-
-        val overlapped =
-            nts.flatMap(n ⇒ n.tokenFrom to n.tokenTo).map(ns(_)).exists(
-                t ⇒ userNotesTypes.map(pt ⇒ t.getNotes(pt).size).sum > 1
-            )
-
-        if (nts.nonEmpty && !overlapped) {
-            val nsCopyToks = ns.clone()
-            ns.clear()
-
-            val buf = mutable.ArrayBuffer.empty[Int]
-
-            for (i ← nsCopyToks.indices)
-                nts.find(_.tokenIndexes.contains(i)) match {
-                    case Some(n) ⇒
-                        if (!buf.contains(n.tokenFrom)) {
-                            buf += n.tokenFrom
-
-                            ns += mkCompound(ns, nsCopyToks, n.tokenIndexes, stop = false, ns.size, Some(n), history)
-                        }
-                    case None ⇒ simpleCopy(ns, history, nsCopyToks, i)
-                }
-
-            fixIndexes(ns, userNotesTypes)
-        }
-    }
-
-    /**
-      * Makes compound note.
-      *
-      * @param ns Sentence.
-      * @param nsCopyToks Tokens.
-      * @param indexes Indexes.
-      * @param stop Flag.
-      * @param idx Index.
-      * @param commonNote Common note.
-      * @param history Indexes transformation history.
-      */
-    private def mkCompound(
-        ns: NCNlpSentence,
-        nsCopyToks: Seq[NCNlpSentenceToken],
-        indexes: Seq[Int],
-        stop: Boolean,
-        idx: Int,
-        commonNote: Option[NCNlpSentenceNote],
-        history: mutable.ArrayBuffer[(Int, Int)]
-    ): NCNlpSentenceToken = {
-        val t = NCNlpSentenceToken(idx)
-
-        // Note, it adds stop-words too.
-        val content = nsCopyToks.zipWithIndex.filter(p ⇒ indexes.contains(p._2)).map(_._1)
-
-        content.foreach(t ⇒ history += t.index → idx)
-
-        def mkValue(get: NCNlpSentenceToken ⇒ String): String = {
-            val buf = mutable.Buffer.empty[String]
-
-            val n = content.size - 1
-
-            content.zipWithIndex.foreach(p ⇒ {
-                val t = p._1
-                val idx = p._2
-
-                buf += get(t)
-
-                if (idx < n && t.endCharIndex != content(idx + 1).startCharIndex)
-                    buf += " "
-            })
-
-            buf.mkString
-        }
-
-        val origText = mkValue((t: NCNlpSentenceToken) ⇒ t.origText)
-
-        val idxs = Seq(idx)
-        val wordIdxs = content.flatMap(_.wordIndexes).sorted
-
-        val direct =
-            commonNote match {
-                case Some(n) if n.isUser ⇒ n.isDirect
-                case _ ⇒ content.forall(_.isDirect)
-            }
-
-        val params = Seq(
-            "index" → idx,
-            "pos" → NCPennTreebank.SYNTH_POS,
-            "posDesc" → NCPennTreebank.SYNTH_POS_DESC,
-            "lemma" → mkValue((t: NCNlpSentenceToken) ⇒ t.lemma),
-            "origText" → origText,
-            "normText" → mkValue((t: NCNlpSentenceToken) ⇒ t.normText),
-            "stem" → mkValue((t: NCNlpSentenceToken) ⇒ t.stem),
-            "start" → content.head.startCharIndex,
-            "end" → content.last.endCharIndex,
-            "charLength" → origText.length,
-            "quoted" → false,
-            "stopWord" → stop,
-            "bracketed" → false,
-            "direct" → direct,
-            "dict" → (if (nsCopyToks.size == 1) nsCopyToks.head.getNlpNote.data[Boolean]("dict") else false),
-            "english" → nsCopyToks.forall(_.getNlpNote.data[Boolean]("english")),
-            "swear" → nsCopyToks.exists(_.getNlpNote.data[Boolean]("swear"))
-        )
-
-        val nlpNote = NCNlpSentenceNote(idxs, wordIdxs, "nlpcraft:nlp", params: _*)
-
-        t.add(nlpNote)
-
-        // Adds processed note with fixed indexes.
-        commonNote match {
-            case Some(n) ⇒
-                ns.removeNote(n)
-                t.add(n.clone(idxs, wordIdxs))
-            case None ⇒ // No-op.
-        }
-
-        t
-    }
-
-    /**
-      * Fixes notes with references list to other notes indexes.
-      *
-      * @param noteType Note type.
-      * @param idxsField Indexes field.
-      * @param noteField Note field.
-      * @param ns Sentence.
-      * @param history Indexes transformation history.
-      * @return Valid flag.
-      */
-    private def fixIndexesReferencesList(
-        noteType: String,
-        idxsField: String,
-        noteField: String,
-        ns: NCNlpSentence,
-        history: Seq[(Int, Int)]
-    ): Boolean = {
-        var ok = true
-
-        for (tok ← ns.filter(_.isTypeOf(noteType)) if ok)
-            tok.getNoteOpt(noteType, idxsField) match {
-                case Some(n) ⇒
-                    val idxs: Seq[Seq[Int]] =
-                        n.data[JList[JList[Int]]](idxsField).asScala.map(_.asScala)
-                    var fixed = idxs
-
-                    history.foreach {
-                        case (idxOld, idxNew) ⇒ fixed = fixed.map(_.map(i ⇒ if (i == idxOld) idxNew else i).distinct)
-                    }
-
-                    if (fixed.forall(_.size == 1))
-                        // Fix double dimension array to one dimension,
-                        // so it should be called always in spite of 'fixIndexesReferences' method.
-                        ns.fixNote(n, idxsField → fixed.map(_.head).asJava.asInstanceOf[JSerializable])
-                    else
-                        ok = false
-                case None ⇒ // No-op.
-            }
-
-        ok &&
-            ns.flatMap(_.getNotes(noteType)).forall(rel ⇒
-                rel.dataOpt[JList[Int]](idxsField) match {
-                    case Some(idxsList) ⇒
-                        val notesTypes = rel.data[JList[String]](noteField)
-
-                        require(idxsList.size() == notesTypes.size())
-
-                        idxsList.asScala.zip(notesTypes.asScala).forall {
-                            case (idxs, notesType) ⇒ checkRelation(ns, Seq(idxs), notesType, rel)
-                        }
-                    case None ⇒ true
-                }
-            )
-    }
-
-    /**
-      * Fixes tokens positions.
-      *
-      * @param ns Sentence.
-      * @param notNlpTypes Token types.
-      */
-    private def collapseSentence(ns: NCNlpSentence, notNlpTypes: Seq[String]): Boolean = {
-        ns.
-            filter(!_.isNlp).
-            filter(_.isStopWord).
-            flatten.
-            filter(_.isNlp).
-            foreach(n ⇒ ns.fixNote(n, "stopWord" → false))
-
-        val all = ns.tokens.flatten
-        val nsNotes: Map[String, Seq[Int]] = all.map(p ⇒ p.noteType → p.tokenIndexes).toMap
-
-        for (
-            t ← ns.tokens; stopReason ← t.stopsReasons
-                if all.contains(stopReason) && nsNotes.getOrElse(stopReason.noteType, Seq.empty) == stopReason.tokenIndexes
-        )
-            ns.fixNote(t.getNlpNote, "stopWord" → true)
-
-        val history = mutable.ArrayBuffer.empty[(Int, Int)]
-
-        fixNoteIndexes("nlpcraft:relation", "indexes", "note", ns)
-        fixNoteIndexes("nlpcraft:limit", "indexes", "note", ns)
-        fixNoteIndexesList("nlpcraft:sort", "subjindexes", "subjnotes", ns)
-        fixNoteIndexesList("nlpcraft:sort", "byindexes", "bynotes", ns)
-
-        notNlpTypes.foreach(typ ⇒ zipNotes(ns, typ, notNlpTypes, history))
-        unionStops(ns, notNlpTypes, history)
-
-        val res =
-            fixIndexesReferences("nlpcraft:relation", "indexes", "note", ns, history) &&
-            fixIndexesReferences("nlpcraft:limit", "indexes", "note", ns, history) &&
-            fixIndexesReferencesList("nlpcraft:sort", "subjindexes", "subjnotes", ns, history) &&
-            fixIndexesReferencesList("nlpcraft:sort", "byindexes", "bynotes", ns, history)
-
-        if (res) {
-            // Validation (all indexes calculated well)
-            require(
-                !res ||
-                    !ns.flatten.
-                        exists(n ⇒ ns.filter(_.wordIndexes.exists(n.wordIndexes.contains)).exists(t ⇒ !t.contains(n))),
-                s"Invalid sentence:\n" +
-                    ns.map(t ⇒
-                        // Human readable invalid sentence for debugging.
-                        s"${t.origText}{index:${t.index}}[${t.map(n ⇒ s"${n.noteType}, {range:${n.tokenFrom}-${n.tokenTo}}").mkString("|")}]"
-                    ).mkString("\n")
-            )
-        }
-
-        res
-    }
-
-    private def dropAbstract(mdl: NCModel, ns: NCNlpSentence): Unit =
-        if (!mdl.getAbstractTokens.isEmpty) {
-            val notes = ns.flatten
-
-            val keys = getPartKeys(notes: _*)
-            val noteLinks = getLinks(notes)
-
-            notes.filter(n ⇒ {
-                val noteToks = ns.tokens.filter(_.contains(n))
-
-                mdl.getAbstractTokens.contains(n.noteType) &&
-                    !keys.exists(_.intersect(n.noteType, noteToks.head.startCharIndex, noteToks.last.startCharIndex)) &&
-                    !noteLinks.contains(NoteLink(n.noteType, n.tokenIndexes.sorted))
-            }).foreach(ns.removeNote)
-        }
-
-    private def getNotNlpNotes(toks: Seq[NCNlpSentenceToken]): Seq[NCNlpSentenceNote] =
-        toks.flatten.filter(!_.isNlp).distinct
-
-    private def addDeleted(thisSen: NCNlpSentence, sen: NCNlpSentence, dels: Iterable[NCNlpSentenceNote]): Unit =
-        sen.deletedNotes ++= dels.map(n ⇒ {
-            val savedDelNote = n.clone()
-            val savedDelToks = n.tokenIndexes.map(idx ⇒ thisSen(idx).clone())
-
-            val mainNotes = savedDelToks.flatten.filter(n ⇒ n.noteType != "nlpcraft:nlp" && n != savedDelNote)
-
-            // Deleted note's tokens should contains only nlp data and deleted notes.
-            for (savedDelTok ← savedDelToks; mainNote ← mainNotes)
-                savedDelTok.remove(mainNote)
-
-            savedDelNote → savedDelToks
-        })
-
-    /**
-      * This collapser handles several tasks:
-      * - "overall" collapsing after all other individual collapsers had their turn.
-      * - Special further enrichment of tokens like linking, etc.
-      *
-      * In all cases of overlap (full or partial) - the "longest" note wins. In case of overlap and equal
-      * lengths - the winning note is chosen based on this priority.
-      */
-    @throws[NCE]
-    private def collapseSentence(thisSen: NCNlpSentence, mdl: NCModel, lastPhase: Boolean = false): Seq[NCNlpSentence] = {
-        def collapse0(ns: NCNlpSentence): Option[NCNlpSentence] = {
-            if (lastPhase)
-                dropAbstract(mdl, ns)
-
-            if (collapseSentence(ns, getNotNlpNotes(ns).map(_.noteType).distinct)) Some(ns) else None
-        }
-
-        // Always deletes `similar` notes.
-        // Some words with same note type can be detected various ways.
-        // We keep only one variant -  with `best` direct and sparsity parameters,
-        // other variants for these words are redundant.
-        val redundant: Seq[NCNlpSentenceNote] =
-            thisSen.flatten.filter(!_.isNlp).distinct.
-                groupBy(_.getKey()).
-                map(p ⇒ p._2.sortBy(p ⇒
-                    (
-                        // System notes don't have such flags.
-                        if (p.isUser) {
-                            if (p.isDirect)
-                                0
-                            else
-                                1
-                        }
-                        else
-                            0,
-                        if (p.isUser)
-                            p.sparsity
-                        else
-                            0
-                    )
-                )).
-                flatMap(_.drop(1)).
-                toSeq
-
-        redundant.foreach(thisSen.removeNote)
-
-        var delCombs: Seq[NCNlpSentenceNote] =
-            getNotNlpNotes(thisSen).
-                flatMap(note ⇒ getNotNlpNotes(note.tokenIndexes.sorted.map(i ⇒ thisSen(i))).filter(_ != note)).
-                distinct
-
-        println("delCombs="+delCombs.mkString("\n"))
-
-        // Optimization. Deletes all wholly swallowed notes.
-        val links = getLinks(thisSen.flatten)
-
-        val swallowed =
-            delCombs.
-                // There aren't links on it.
-                filter(n ⇒ !links.contains(NoteLink(n.noteType, n.tokenIndexes.sorted))).
-                // It doesn't have links.
-                filter(getPartKeys(_).isEmpty).
-                flatMap(note ⇒ {
-                    val noteWordsIdxs = note.wordIndexes.toSet
-                    val key = PartKey(note, thisSen)
-
-                    val delCombOthers =
-                        delCombs.filter(_ != note).flatMap(n ⇒ if (getPartKeys(n).contains(key)) Some(n) else None)
-
-                    if (delCombOthers.exists(o ⇒ noteWordsIdxs == o.wordIndexes.toSet)) Some(note) else None
-                })
-
-        delCombs = delCombs.filter(p ⇒ !swallowed.contains(p))
-        addDeleted(thisSen, thisSen, swallowed)
-        swallowed.foreach(thisSen.removeNote)
-
-        val toksByIdx: Seq[Seq[NCNlpSentenceNote]] =
-            delCombs.flatMap(note ⇒ note.wordIndexes.map(_ → note)).
-                groupBy { case (idx, _) ⇒ idx }.
-                map { case (_, seq) ⇒ seq.map { case (_, note) ⇒ note } }.
-                toSeq.sortBy(-_.size)
-
-//        val toksByIdx1 =
-//            delCombs.flatMap(note ⇒ note.wordIndexes.map(_ → note)).
-//                groupBy { case (idx, _) ⇒ idx }.
-//                map { case (idx, seq) ⇒ idx → seq.map { case (_, note) ⇒ note } }.
-//                toSeq.sortBy(_._2.size)
-//
-//        toksByIdx.foreach{ case (seq) ⇒
-//            println(s"toksByIdx seq=${seq.map(i ⇒ s"${i.noteType} ${i.wordIndexes.mkString(",")}").mkString(" | ")}")
-//        }
-
-//        toksByIdx1.sortBy(_._1).foreach{ case (i, seq) ⇒
-//            println(s"toksByIdx1 ${i} seq=${seq.map(i ⇒ s"${i.noteType} ${i.wordIndexes.mkString(",")}").mkString(" | ")}")
-//        }
-
-        val dict = mutable.HashMap.empty[String, NCNlpSentenceNote]
-        val dictBack = mutable.HashMap.empty[NCNlpSentenceNote, String]
-
-        var i = 'A'
-
-        val converted: Seq[Seq[String]] =
-            toksByIdx.map(seq ⇒ {
-                seq.map(
-                    n ⇒ {
-                        val s =
-                            dictBack.get(n) match {
-                                case Some(s) ⇒ s
-                                case None ⇒ {
-                                    val s = s"$i"
-
-                                    i = (i.toInt + 1).toChar
-
-                                    dict += s → n
-                                    dictBack += n → s
-
-                                    s
-                                }
-                            }
-
-                        s
-                    }
-                )
-            })
-
-        //val minDelSize = if (toksByIdx.isEmpty) 1 else toksByIdx.map(_.size).max - 1
-
-        var sens =
-            if (delCombs.nonEmpty) {
-                val p = new ForkJoinPool()
-
-                val tmp = NCComboRecursiveTask.findCombinations(
-                    toksByIdx.map(_.asJava).asJava,
-                    p
-                )
-
-                p.shutdown()
-
-                val seq1 = tmp.asScala.map(_.asScala)
-
-                val sens =
-                    seq1.
-                        flatMap(p ⇒ {
-                            val delComb: Seq[NCNlpSentenceNote] = p
-
-                            val nsClone = thisSen.clone()
-
-                            // Saves deleted notes for sentence and their tokens.
-                            addDeleted(thisSen, nsClone, delComb)
-                            delComb.foreach(nsClone.removeNote)
-
-                            // Has overlapped notes for some tokens.
-                            require(
-                                !nsClone.exists(_.count(!_.isNlp) > 1),
-                                s"Invalid notes: ${nsClone.filter(_.count(!_.isNlp) > 1).mkString("|")}"
-                            )
-
-                            collapse0(nsClone)
-                        })
-
-                // It removes sentences which have only one difference - 'direct' flag of their user tokens.
-                // `Direct` sentences have higher priority.
-                case class Key(sysNotes: Seq[Map[String, JSerializable]], userNotes: Seq[Map[String, JSerializable]])
-                case class Value(sentence: NCNlpSentence, directCount: Int)
-
-                val m = mutable.HashMap.empty[Key, Value]
-
-                sens.map(sen ⇒ {
-                    val notes = sen.flatten
-
-                    val sysNotes = notes.filter(_.isSystem)
-                    val nlpNotes = notes.filter(_.isNlp)
-                    val userNotes = notes.filter(_.isUser)
-
-                    def get(seq: Seq[NCNlpSentenceNote]): Seq[Map[String, JSerializable]] =
-                        seq.map(p ⇒
-                            // We have to delete some keys to have possibility to compare sentences.
-                            p.clone().filter(_._1 != "direct")
-                        )
-
-                    (Key(get(sysNotes), get(userNotes)), sen, nlpNotes.map(p ⇒ if (p.isDirect) 0 else 1).sum)
-                }).
-                    foreach { case (key, sen, directCnt) ⇒
-                        m.get(key) match {
-                            case Some(v) ⇒
-                                // Best sentence is sentence with `direct` synonyms.
-                                if (v.directCount > directCnt)
-                                    m += key → Value(sen, directCnt)
-                            case None ⇒ m += key → Value(sen, directCnt)
-                        }
-                    }
-
-                m.values.map(_.sentence).toSeq
-            }
-            else
-                collapse0(thisSen).flatMap(p ⇒ Option(Seq(p))).getOrElse(Seq.empty)
-
-        sens = sens.distinct
-
-        sens.foreach(sen ⇒
-            sen.foreach(tok ⇒
-                tok.size match {
-                    case 1 ⇒ require(tok.head.isNlp, s"Unexpected non-'nlpcraft:nlp' token: $tok")
-                    case 2 ⇒ require(tok.head.isNlp ^ tok.last.isNlp, s"Unexpected token notes: $tok")
-                    case _ ⇒ require(requirement = false, s"Unexpected token notes count: $tok")
-                }
-            )
-        )
-
-        // Drops similar sentences (with same tokens structure).
-        // Among similar sentences we prefer one with minimal free words count.
-        sens.groupBy(_.flatten.filter(!_.isNlp).map(_.getKey(withIndexes = false))).
-            map { case (_, seq) ⇒ seq.minBy(_.filter(p ⇒ p.isNlp && !p.isStopWord).map(_.wordIndexes.length).sum) }.
-            toSeq
-    }
 }
 
 import org.apache.nlpcraft.common.nlp.NCNlpSentence._
@@ -975,7 +214,10 @@ class NCNlpSentence(
       */
     def getDeletedNotes: Predef.Map[NCNlpSentenceNote, Seq[NCNlpSentenceToken]] = deletedNotes.toMap
 
-    def collapse(mdl: NCModel, lastPhase: Boolean = false): Seq[NCNlpSentence] = {
-        collapseSentence(this, mdl, lastPhase)
-    }
+    /***
+      *
+      * @param deletedNotes
+      */
+    def addDeletedNotes(deletedNotes: Map[NCNlpSentenceNote, Seq[NCNlpSentenceToken]]): Unit =
+       this.deletedNotes ++= deletedNotes
 }
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/SyTest.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/SyTest.java
deleted file mode 100644
index db5f657..0000000
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/SyTest.java
+++ /dev/null
@@ -1,241 +0,0 @@
-package org.apache.nlpcraft.common.nlp;
-
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.Comparator;
-import java.util.List;
-import java.util.Set;
-import java.util.TreeSet;
-import java.util.concurrent.ForkJoinPool;
-import java.util.concurrent.RecursiveTask;
-import java.util.stream.IntStream;
-
-import static java.util.stream.Collectors.toList;
-import static java.util.Arrays.asList;
-import static java.util.stream.Collectors.toUnmodifiableList;
-
-public class SyTest {
-    public static class ComboSearch extends RecursiveTask<List<Long>> {
-        private static final long THRESHOLD = (long)Math.pow(2, 20);
-
-        private final long lo;
-
-        private final long hi;
-
-        private final long[] wordBits;
-
-        private final int[] wordCounts;
-
-        public ComboSearch(
-            long lo,
-            long hi,
-            long[] words,
-            int[] wordCounts
-        ) {
-            this.lo = lo;
-            this.hi = hi;
-            this.wordBits = words;
-            this.wordCounts = wordCounts;
-        }
-
-        public static <T> List<List<T>> findCombos(List<List<T>> inp, ForkJoinPool pool) {
-            List<List<T>> uniqueInp = inp.stream()
-                .filter(row -> inp.stream().noneMatch(it -> it != row && it.containsAll(row)))
-                .map(i -> i.stream().distinct().sorted().collect(toList()))
-                .collect(toList());
-
-            // Build dictionary of unique words.
-            List<T> dict = uniqueInp.stream()
-                .flatMap(Collection::stream)
-                .distinct()
-                .sorted()
-                .collect(toList());
-
-            System.out.println("uniqueInp="+uniqueInp.size());
-            System.out.println("dict="+dict.size());
-
-            if (dict.size() > Long.SIZE) {
-                // Note: Power set of 64 words results in 9223372036854775807 combinations.
-                throw new IllegalArgumentException("Can handle more than " + Long.SIZE + " unique words in the dictionary.");
-            }
-
-            // Convert words to bitmasks (each bit corresponds to an index in the dictionary).
-            long[] wordBits = uniqueInp.stream()
-                .sorted(Comparator.comparingInt(List::size))
-                .mapToLong(row -> wordsToBits(row, dict))
-                .toArray();
-
-            // Cache words count per row.
-            int[] wordCounts = uniqueInp.stream()
-                .sorted(Comparator.comparingInt(List::size))
-                .mapToInt(List::size)
-                .toArray();
-
-            // Prepare Fork/Join task to iterate over the power set of all combinations.
-            int lo = 1;
-            long hi = (long)Math.pow(2, dict.size());
-
-            ComboSearch task = new ComboSearch(
-                lo,
-                hi,
-                wordBits,
-                wordCounts
-            );
-
-            return pool.invoke(task).stream()
-                .map(bits -> bitsToWords(bits, dict))
-                .collect(toList());
-        }
-
-        @Override
-        protected List<Long> compute() {
-            if (hi - lo <= THRESHOLD) {
-                return computeLocal();
-            } else {
-                return forkJoin();
-            }
-        }
-
-        private List<Long> computeLocal() {
-            List<Long> result = new ArrayList<>();
-
-            for (long comboBits = lo; comboBits < hi; comboBits++) {
-                boolean match = true;
-
-                // For each input row we check if subtracting the current combination of words
-                // from the input row would give us the expected result.
-                for (int j = 0; j < wordBits.length; j++) {
-                    // Get bitmask of how many words can be subtracted from the row.
-                    long commonBits = wordBits[j] & comboBits;
-
-                    int wordsToRemove = Long.bitCount(commonBits);
-
-                    // Check if there is more than 1 word remaining after subtraction.
-                    if (wordCounts[j] - wordsToRemove > 1) {
-                        // Skip this combination.
-                        match = false;
-
-                        break;
-                    }
-                }
-
-                if (match && !includes(comboBits, result)) {
-                    result.add(comboBits);
-                }
-            }
-
-            return result;
-        }
-
-        private List<Long> forkJoin() {
-            long mid = lo + hi >>> 1L;
-
-            ComboSearch t1 = new ComboSearch(lo, mid, wordBits, wordCounts);
-            ComboSearch t2 = new ComboSearch(mid, hi, wordBits, wordCounts);
-
-            t2.fork();
-
-            return merge(t1.compute(), t2.join());
-        }
-
-        private List<Long> merge(List<Long> l1, List<Long> l2) {
-            if (l1.isEmpty()) {
-                return l2;
-            } else if (l2.isEmpty()) {
-                return l1;
-            }
-
-            int size1 = l1.size();
-            int size2 = l2.size();
-
-            if (size1 == 1 && size2 > 1 || size2 == 1 && size1 > 1) {
-                // Minor optimization in case if one of the lists has only one element.
-                List<Long> list = size1 == 1 ? l2 : l1;
-                Long val = size1 == 1 ? l1.get(0) : l2.get(0);
-
-                if (!includes(val, list)) {
-                    list.add(val);
-                }
-
-                return list;
-            } else {
-                List<Long> result = new ArrayList<>(size1 + size2);
-
-                for (int i = 0, max = Math.max(size1, size2); i < max; i++) {
-                    Long v1 = i < size1 ? l1.get(i) : null;
-                    Long v2 = i < size2 ? l2.get(i) : null;
-
-                    if (v1 != null && v2 != null) {
-                        if (containsAllBits(v1, v2)) {
-                            v1 = null;
-                        } else if (containsAllBits(v2, v1)) {
-                            v2 = null;
-                        }
-                    }
-
-                    if (v1 != null && !includes(v1, result)) {
-                        result.add(v1);
-                    }
-
-                    if (v2 != null && !includes(v2, result)) {
-                        result.add(v2);
-                    }
-                }
-
-                return result;
-            }
-        }
-
-        private static boolean includes(long bits, List<Long> allBits) {
-            for (long existing : allBits) {
-                if (containsAllBits(bits, existing)) {
-                    return true;
-                }
-            }
-
-            return false;
-        }
-
-        private static boolean containsAllBits(long bitSet1, long bitSet2) {
-            return (bitSet1 & bitSet2) == bitSet2;
-        }
-
-        private static <T> long wordsToBits(List<T> words, List<T> dict) {
-            long bits = 0;
-
-            for (int i = 0; i < dict.size(); i++) {
-                if (words.contains(dict.get(i))) {
-                    bits |= 1L << i;
-                }
-            }
-
-            return bits;
-        }
-
-        private static <T> List<T> bitsToWords(long bits, List<T> dict) {
-            List<T> words = new ArrayList<>(Long.bitCount(bits));
-
-            for (int i = 0; i < dict.size(); i++) {
-                if ((bits & 1L << i) != 0) {
-                    words.add(dict.get(i));
-                }
-            }
-
-            return words;
-        }
-    }
-
-    public static void main(String[] args) throws InterruptedException {
-        List<List<String>> words = IntStream.range(0, 35)
-            .mapToObj(i -> IntStream.range(0, i + 1).mapToObj(String::valueOf).collect(toUnmodifiableList()))
-            .collect(toUnmodifiableList());
-
-        long t = System.currentTimeMillis();
-
-        ForkJoinPool forkJoinPool = new ForkJoinPool();
-        final List<List<String>> combos = ComboSearch.findCombos(words, forkJoinPool);
-
-        System.out.println("size=" + combos.size());
-        System.out.println("time=" + (System.currentTimeMillis() - t));
-    }
-}
\ No newline at end of file
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/NCProbeBoot.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/NCProbeBoot.scala
index 19308e3..da13b07 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/NCProbeBoot.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/NCProbeBoot.scala
@@ -49,6 +49,7 @@ import org.apache.nlpcraft.probe.mgrs.nlp.enrichers.sort.NCSortEnricher
 import org.apache.nlpcraft.probe.mgrs.nlp.enrichers.stopword.NCStopWordEnricher
 import org.apache.nlpcraft.probe.mgrs.nlp.enrichers.suspicious.NCSuspiciousNounsEnricher
 import org.apache.nlpcraft.probe.mgrs.nlp.validate.NCValidateManager
+import org.apache.nlpcraft.probe.mgrs.sentence.NCSentenceManager
 import resource.managed
 
 import java.io._
@@ -512,6 +513,7 @@ private [probe] object NCProbeBoot extends LazyLogging with NCOpenCensusTrace {
             startedMgrs += NCProbeEnrichmentManager.start(span)
             startedMgrs += NCConnectionManager.start(span)
             startedMgrs += NCDialogFlowManager.start(span)
+            startedMgrs += NCSentenceManager.start(span)
         }
     }
 
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/NCProbeEnrichmentManager.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/NCProbeEnrichmentManager.scala
index 368f0c4..c328e57 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/NCProbeEnrichmentManager.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/NCProbeEnrichmentManager.scala
@@ -43,6 +43,7 @@ import org.apache.nlpcraft.probe.mgrs.nlp.enrichers.stopword.NCStopWordEnricher
 import org.apache.nlpcraft.probe.mgrs.nlp.enrichers.suspicious.NCSuspiciousNounsEnricher
 import org.apache.nlpcraft.probe.mgrs.nlp.impl._
 import org.apache.nlpcraft.probe.mgrs.nlp.validate._
+import org.apache.nlpcraft.probe.mgrs.sentence.NCSentenceManager
 import org.apache.nlpcraft.probe.mgrs.{NCProbeMessage, NCProbeVariants}
 
 import java.io.Serializable
@@ -500,7 +501,7 @@ object NCProbeEnrichmentManager extends NCService with NCOpenCensusModelStats {
                         s"]")
             }
 
-            nlpSen.clone().collapse(mdl.model, lastPhase = true).
+            NCSentenceManager.collapse(mdl.model, nlpSen.clone(), lastPhase = true).
                 // Sorted to support deterministic logs.
                 sortBy(p ⇒
                 p.map(p ⇒ {
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
index 2a9dec0..82edf69 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
@@ -24,6 +24,7 @@ import org.apache.nlpcraft.model._
 import org.apache.nlpcraft.probe.mgrs.NCProbeSynonymChunkKind.{NCSynonymChunkKind, TEXT}
 import org.apache.nlpcraft.probe.mgrs.nlp.NCProbeEnricher
 import org.apache.nlpcraft.probe.mgrs.nlp.impl.NCRequestImpl
+import org.apache.nlpcraft.probe.mgrs.sentence.NCSentenceManager
 import org.apache.nlpcraft.probe.mgrs.{NCProbeModel, NCProbeSynonym, NCProbeVariants}
 
 import java.io.Serializable
@@ -355,7 +356,11 @@ object NCModelEnricher extends NCProbeEnricher with DecorateAsScala {
                 toks.map(t ⇒ (t.origText, t.index)).mkString(" ")
 
             var permCnt = 0
-            lazy val collapsedSens = NCProbeVariants.convert(ns.srvReqId, mdl, ns.clone().collapse(mdl.model)).map(_.asScala)
+            lazy val collapsedSens = NCProbeVariants.convert(
+                ns.srvReqId,
+                mdl,
+                NCSentenceManager.collapse(mdl.model, ns.clone())
+            ).map(_.asScala)
 
             /**
               *
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/sentence/NCSentenceManager.scala
similarity index 68%
copy from nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala
copy to nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/sentence/NCSentenceManager.scala
index 95a98a3..e0dd59c 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/sentence/NCSentenceManager.scala
@@ -15,29 +15,30 @@
  * limitations under the License.
  */
 
-package org.apache.nlpcraft.common.nlp
+package org.apache.nlpcraft.probe.mgrs.sentence
 
-import com.typesafe.scalalogging.LazyLogging
-import org.apache.nlpcraft.common.NCE
+import io.opencensus.trace.Span
+import org.apache.nlpcraft.common.nlp.NCNlpSentence.NoteLink
 import org.apache.nlpcraft.common.nlp.pos.NCPennTreebank
-import org.apache.nlpcraft.common.util.NCComboRecursiveTask
+import org.apache.nlpcraft.common.nlp.{NCNlpSentence, NCNlpSentenceNote, NCNlpSentenceToken}
+import org.apache.nlpcraft.common.{NCE, NCService, U}
 import org.apache.nlpcraft.model.NCModel
 
-import java.io.{Serializable => JSerializable}
+import java.io.{Serializable ⇒ JSerializable}
 import java.util
-import java.util.concurrent.ForkJoinPool
-import java.util.{Collections, Comparator, List => JList}
-import scala.collection.JavaConverters._
+import java.util.{List ⇒ JList}
+import scala.collection.JavaConverters.{asScalaBufferConverter, _}
 import scala.collection.mutable.ArrayBuffer
 import scala.collection.{Map, Seq, Set, mutable}
 import scala.language.implicitConversions
 
-object NCNlpSentence extends LazyLogging {
-    implicit def toTokens(x: NCNlpSentence): ArrayBuffer[NCNlpSentenceToken] = x.tokens
+/**
+  * Sentences processing manager.
+  */
+object NCSentenceManager extends NCService {
+    @volatile private var pool: java.util.concurrent.ForkJoinPool = _
 
-    case class NoteKey(start: Int, end: Int)
-    case class TokenKey(id: String, start: Int, end: Int)
-    case class NoteLink(note: String, indexes: Seq[Int])
+    implicit def toTokens(x: NCNlpSentence): ArrayBuffer[NCNlpSentenceToken] = x.tokens
 
     case class PartKey(id: String, start: Int, end: Int) {
         require(start <= end)
@@ -73,7 +74,7 @@ object NCNlpSentence extends LazyLogging {
                 noteLinks ++=
                     (for ((name, idxs) ← names.asScala.zip(idxsSeq.asScala.map(_.asScala)))
                         yield NoteLink(name, idxs.sorted)
-                    )
+                        )
             }
 
             if (n.contains("subjnotes")) add("subjnotes", "subjindexes")
@@ -462,8 +463,8 @@ object NCNlpSentence extends LazyLogging {
                     }
 
                     if (fixed.forall(_.size == 1))
-                        // Fix double dimension array to one dimension,
-                        // so it should be called always in spite of 'fixIndexesReferences' method.
+                    // Fix double dimension array to one dimension,
+                    // so it should be called always in spite of 'fixIndexesReferences' method.
                         ns.fixNote(n, idxsField → fixed.map(_.head).asJava.asInstanceOf[JSerializable])
                     else
                         ok = false
@@ -521,9 +522,9 @@ object NCNlpSentence extends LazyLogging {
 
         val res =
             fixIndexesReferences("nlpcraft:relation", "indexes", "note", ns, history) &&
-            fixIndexesReferences("nlpcraft:limit", "indexes", "note", ns, history) &&
-            fixIndexesReferencesList("nlpcraft:sort", "subjindexes", "subjnotes", ns, history) &&
-            fixIndexesReferencesList("nlpcraft:sort", "byindexes", "bynotes", ns, history)
+                fixIndexesReferences("nlpcraft:limit", "indexes", "note", ns, history) &&
+                fixIndexesReferencesList("nlpcraft:sort", "subjindexes", "subjnotes", ns, history) &&
+                fixIndexesReferencesList("nlpcraft:sort", "byindexes", "bynotes", ns, history)
 
         if (res) {
             // Validation (all indexes calculated well)
@@ -562,7 +563,7 @@ object NCNlpSentence extends LazyLogging {
         toks.flatten.filter(!_.isNlp).distinct
 
     private def addDeleted(thisSen: NCNlpSentence, sen: NCNlpSentence, dels: Iterable[NCNlpSentenceNote]): Unit =
-        sen.deletedNotes ++= dels.map(n ⇒ {
+        sen.addDeletedNotes(dels.map(n ⇒ {
             val savedDelNote = n.clone()
             val savedDelToks = n.tokenIndexes.map(idx ⇒ thisSen(idx).clone())
 
@@ -573,7 +574,7 @@ object NCNlpSentence extends LazyLogging {
                 savedDelTok.remove(mainNote)
 
             savedDelNote → savedDelToks
-        })
+        }).toMap)
 
     /**
       * This collapser handles several tasks:
@@ -597,27 +598,27 @@ object NCNlpSentence extends LazyLogging {
         // We keep only one variant -  with `best` direct and sparsity parameters,
         // other variants for these words are redundant.
         val redundant: Seq[NCNlpSentenceNote] =
-            thisSen.flatten.filter(!_.isNlp).distinct.
-                groupBy(_.getKey()).
-                map(p ⇒ p._2.sortBy(p ⇒
-                    (
-                        // System notes don't have such flags.
-                        if (p.isUser) {
-                            if (p.isDirect)
-                                0
-                            else
-                                1
-                        }
-                        else
-                            0,
-                        if (p.isUser)
-                            p.sparsity
-                        else
+        thisSen.flatten.filter(!_.isNlp).distinct.
+            groupBy(_.getKey()).
+            map(p ⇒ p._2.sortBy(p ⇒
+                (
+                    // System notes don't have such flags.
+                    if (p.isUser) {
+                        if (p.isDirect)
                             0
-                    )
-                )).
-                flatMap(_.drop(1)).
-                toSeq
+                        else
+                            1
+                    }
+                    else
+                        0,
+                    if (p.isUser)
+                        p.sparsity
+                    else
+                        0
+                )
+            )).
+            flatMap(_.drop(1)).
+            toSeq
 
         redundant.foreach(thisSen.removeNote)
 
@@ -626,8 +627,6 @@ object NCNlpSentence extends LazyLogging {
                 flatMap(note ⇒ getNotNlpNotes(note.tokenIndexes.sorted.map(i ⇒ thisSen(i))).filter(_ != note)).
                 distinct
 
-        println("delCombs="+delCombs.mkString("\n"))
-
         // Optimization. Deletes all wholly swallowed notes.
         val links = getLinks(thisSen.flatten)
 
@@ -651,89 +650,55 @@ object NCNlpSentence extends LazyLogging {
         addDeleted(thisSen, thisSen, swallowed)
         swallowed.foreach(thisSen.removeNote)
 
-        val toksByIdx: Seq[Seq[NCNlpSentenceNote]] =
+        val toksByIdx: Seq[Set[NCNlpSentenceNote]] =
             delCombs.flatMap(note ⇒ note.wordIndexes.map(_ → note)).
                 groupBy { case (idx, _) ⇒ idx }.
-                map { case (_, seq) ⇒ seq.map { case (_, note) ⇒ note } }.
+                map { case (_, seq) ⇒ seq.map { case (_, note) ⇒ note }.toSet }.
                 toSeq.sortBy(-_.size)
 
-//        val toksByIdx1 =
-//            delCombs.flatMap(note ⇒ note.wordIndexes.map(_ → note)).
-//                groupBy { case (idx, _) ⇒ idx }.
-//                map { case (idx, seq) ⇒ idx → seq.map { case (_, note) ⇒ note } }.
-//                toSeq.sortBy(_._2.size)
-//
-//        toksByIdx.foreach{ case (seq) ⇒
-//            println(s"toksByIdx seq=${seq.map(i ⇒ s"${i.noteType} ${i.wordIndexes.mkString(",")}").mkString(" | ")}")
-//        }
-
-//        toksByIdx1.sortBy(_._1).foreach{ case (i, seq) ⇒
-//            println(s"toksByIdx1 ${i} seq=${seq.map(i ⇒ s"${i.noteType} ${i.wordIndexes.mkString(",")}").mkString(" | ")}")
-//        }
-
-        val dict = mutable.HashMap.empty[String, NCNlpSentenceNote]
-        val dictBack = mutable.HashMap.empty[NCNlpSentenceNote, String]
-
-        var i = 'A'
-
-        val converted: Seq[Seq[String]] =
-            toksByIdx.map(seq ⇒ {
-                seq.map(
-                    n ⇒ {
-                        val s =
-                            dictBack.get(n) match {
-                                case Some(s) ⇒ s
-                                case None ⇒ {
-                                    val s = s"$i"
-
-                                    i = (i.toInt + 1).toChar
-
-                                    dict += s → n
-                                    dictBack += n → s
-
-                                    s
-                                }
-                            }
-
-                        s
-                    }
-                )
-            })
-
-        //val minDelSize = if (toksByIdx.isEmpty) 1 else toksByIdx.map(_.size).max - 1
+//        val minDelSize = if (toksByIdx.isEmpty) 1 else toksByIdx.map(_.size).max - 1
 
         var sens =
             if (delCombs.nonEmpty) {
-                val p = new ForkJoinPool()
-
-                val tmp = NCComboRecursiveTask.findCombinations(
-                    toksByIdx.map(_.asJava).asJava,
-                    p
-                )
-
-                p.shutdown()
-
-                val seq1 = tmp.asScala.map(_.asScala)
+                val deleted = mutable.ArrayBuffer.empty[Set[NCNlpSentenceNote]]
+
+//                val combs = (minDelSize to delCombs.size).
+//                    flatMap(i ⇒
+//                        delCombs.combinations(i).
+//                            filter(delComb ⇒
+//                                !toksByIdx.exists(
+//                                    rec ⇒
+//                                        rec.size - delCombs.size <= 1 &&
+//                                            rec.count(note ⇒ !delComb.contains(note)) > 1
+//                                )
+//                            )
+//                    ).
+//                    sortBy(_.size).
+//                    map(_.toSet)
+//
+                val combs = NCComboHelper.findCombinations(toksByIdx.map(_.asJava).asJava, pool).asScala.map(_.asScala)
 
                 val sens =
-                    seq1.
-                        flatMap(p ⇒ {
-                            val delComb: Seq[NCNlpSentenceNote] = p
+                    combs.
+                        flatMap(delComb ⇒
+                            // Already processed with less subset of same deleted tokens.
+                            if (!deleted.exists(_.subsetOf(delComb.toSet))) {
+                                val nsClone = thisSen.clone()
 
-                            val nsClone = thisSen.clone()
+                                // Saves deleted notes for sentence and their tokens.
+                                addDeleted(thisSen, nsClone, delComb)
+                                delComb.foreach(nsClone.removeNote)
 
-                            // Saves deleted notes for sentence and their tokens.
-                            addDeleted(thisSen, nsClone, delComb)
-                            delComb.foreach(nsClone.removeNote)
+                                // Has overlapped notes for some tokens.
+                                require(!nsClone.exists(_.count(!_.isNlp) > 1))
 
-                            // Has overlapped notes for some tokens.
-                            require(
-                                !nsClone.exists(_.count(!_.isNlp) > 1),
-                                s"Invalid notes: ${nsClone.filter(_.count(!_.isNlp) > 1).mkString("|")}"
-                            )
+                                deleted += delComb.toSet
 
-                            collapse0(nsClone)
-                        })
+                                collapse0(nsClone)
+                            }
+                            else
+                                None
+                        )
 
                 // It removes sentences which have only one difference - 'direct' flag of their user tokens.
                 // `Direct` sentences have higher priority.
@@ -790,192 +755,23 @@ object NCNlpSentence extends LazyLogging {
             map { case (_, seq) ⇒ seq.minBy(_.filter(p ⇒ p.isNlp && !p.isStopWord).map(_.wordIndexes.length).sum) }.
             toSeq
     }
-}
-
-import org.apache.nlpcraft.common.nlp.NCNlpSentence._
 
-/**
-  * Parsed NLP sentence is a collection of tokens. Each token is a collection of notes and
-  * each note is a collection of KV pairs.
-  *
-  * @param srvReqId Server request ID.
-  * @param text Normalized text.
-  * @param enabledBuiltInToks Enabled built-in tokens.
-  * @param tokens Initial buffer.
-  * @param deletedNotes Deleted overridden notes with their tokens.
-  */
-class NCNlpSentence(
-    val srvReqId: String,
-    val text: String,
-    val enabledBuiltInToks: Set[String],
-    override val tokens: mutable.ArrayBuffer[NCNlpSentenceToken] = new mutable.ArrayBuffer[NCNlpSentenceToken](32),
-    private val deletedNotes: mutable.HashMap[NCNlpSentenceNote, Seq[NCNlpSentenceToken]] = mutable.HashMap.empty,
-    private var initNlpNotes: Map[NoteKey, NCNlpSentenceNote] = null,
-    private val nlpTokens: mutable.HashMap[TokenKey, NCNlpSentenceToken] = mutable.HashMap.empty
-) extends NCNlpSentenceTokenBuffer(tokens) with JSerializable {
-    @transient
-    private var hash: java.lang.Integer = _
-
-    private def calcHash(): Int =
-        Seq(srvReqId, text, enabledBuiltInToks, tokens).map(_.hashCode()).foldLeft(0)((a, b) ⇒ 31 * a + b)
-
-    // Deep copy.
-    override def clone(): NCNlpSentence =
-        new NCNlpSentence(
-            srvReqId,
-            text,
-            enabledBuiltInToks,
-            tokens.map(_.clone()),
-            deletedNotes.map(p ⇒ p._1.clone() → p._2.map(_.clone())),
-            initNlpNotes = initNlpNotes
-        )
+    override def start(parent: Span): NCService = {
+        ackStarting()
 
-    /**
-      * Utility method that gets set of notes for given note type collected from
-      * tokens in this sentence. Notes are sorted in the same order they appear
-      * in this sentence.
-      *
-      * @param noteType Note type.
-      */
-    def getNotes(noteType: String): Seq[NCNlpSentenceNote] = this.flatMap(_.getNotes(noteType)).distinct
+        pool = new java.util.concurrent.ForkJoinPool()
 
-    /**
-      * Utility method that removes note with given ID from all tokens in this sentence.
-      * No-op if such note wasn't found.
-      *
-      * @param note Note.
-      */
-    def removeNote(note: NCNlpSentenceNote): Unit = this.foreach(_.remove(note))
-
-    //noinspection HashCodeUsesVar
-    override def hashCode(): Int = {
-        if (hash == null)
-            hash = calcHash()
-
-        hash
+        ackStarted()
     }
 
-    def fixNote(note: NCNlpSentenceNote, kvs: (String, JSerializable)*): Unit = {
-        val fixed = note.clone(kvs: _*)
+    override def stop(parent: Span): Unit = {
+        ackStopping()
 
-        this.filter(t ⇒ t.index >= fixed.tokenIndexes.head && t.index <= fixed.tokenIndexes.last).foreach(t ⇒ {
-            t.remove(note)
-            t.add(fixed)
-        })
+        U.shutdownPool(pool)
 
-        hash = null
+        ackStopped()
     }
 
-    /**
-      * Returns flag are note notes equal (or similar) or not. Reason of ignored difference can be stopwords tokens.
-      *
-      * @param n1 First note.
-      * @param n2 Second note.
-      */
-    def notesEqualOrSimilar(n1: NCNlpSentenceNote, n2: NCNlpSentenceNote): Boolean =
-        if (n1.noteType != n2.noteType)
-            false
-        else {
-            val stopIdxs = this.filter(_.isStopWord).map(_.index)
-
-            // One possible difference - stopwords indexes.
-            def wordsEqualOrSimilar0(n1: NCNlpSentenceNote, n2: NCNlpSentenceNote): Boolean = {
-                val set1 = n1.wordIndexes.toSet
-                val set2 = n2.wordIndexes.toSet
-
-                set1 == set2 || set1.subsetOf(set2) && set2.diff(set1).forall(stopIdxs.contains)
-            }
-
-            def wordsEqualOrSimilar(n1: NCNlpSentenceNote, n2: NCNlpSentenceNote): Boolean =
-                wordsEqualOrSimilar0(n1, n2) || wordsEqualOrSimilar0(n2, n1)
-
-            def tokensEqualOrSimilar0(set1: Set[NCNlpSentenceToken], set2: Set[NCNlpSentenceToken]): Boolean =
-                set1 == set2 || set1.subsetOf(set2) && set2.diff(set1).forall(_.isStopWord)
-
-            def tokensEqualOrSimilar(set1: Set[NCNlpSentenceToken], set2: Set[NCNlpSentenceToken]): Boolean =
-                tokensEqualOrSimilar0(set1, set2) || tokensEqualOrSimilar0(set2, set1)
-
-            def getList(n: NCNlpSentenceNote, refIdxName: String): Set[NCNlpSentenceToken] =
-                n.getOrElse(refIdxName, Collections.emptyList).asInstanceOf[JList[Int]].asScala.
-                    map(this (_)).toSet
-
-            def getListList(n: NCNlpSentenceNote, refIdxName: String): Set[NCNlpSentenceToken] =
-                n.getOrElse(refIdxName, Collections.emptyList).asInstanceOf[JList[JList[Int]]].asScala.
-                    flatMap(_.asScala.map(this (_))).toSet
-
-            def referencesEqualOrSimilar0(n1: NCNlpSentenceNote, n2: NCNlpSentenceNote): Boolean = {
-                require(n1.noteType == n2.noteType)
-
-                n1.noteType match {
-                    case "nlpcraft:sort" ⇒
-                        tokensEqualOrSimilar(getListList(n1, "subjindexes"), getListList(n2, "subjindexes")) &&
-                            tokensEqualOrSimilar(getListList(n1, "byindexes"), getListList(n2, "byindexes"))
-                    case "nlpcraft:limit" ⇒
-                        tokensEqualOrSimilar(getList(n1, "indexes"), getList(n2, "indexes"))
-                    case "nlpcraft:reference" ⇒
-                        tokensEqualOrSimilar(getList(n1, "indexes"), getList(n2, "indexes"))
-
-                    case _ ⇒ true
-                }
-            }
-
-            def referencesEqualOrSimilar(n1: NCNlpSentenceNote, n2: NCNlpSentenceNote): Boolean =
-                referencesEqualOrSimilar0(n1, n2) || referencesEqualOrSimilar0(n2, n1)
-
-            def getUniqueKey0(n: NCNlpSentenceNote): Seq[Any] = n.getKey(withIndexes = false, withReferences = false)
-
-            getUniqueKey0(n1) == getUniqueKey0(n2) && wordsEqualOrSimilar(n1, n2) && referencesEqualOrSimilar(n1, n2)
-        }
-
-    override def equals(obj: Any): Boolean = obj match {
-        case x: NCNlpSentence ⇒
-            tokens == x.tokens &&
-                srvReqId == x.srvReqId &&
-                text == x.text &&
-                enabledBuiltInToks == x.enabledBuiltInToks
-
-        case _ ⇒ false
-    }
-
-    /**
-      *
-      */
-    def saveNlpNotes(): Unit =
-        initNlpNotes = this.map(t ⇒ NoteKey(t.startCharIndex, t.endCharIndex) → t.getNlpNote).toMap
-
-    /**
-      *
-      * @return
-      */
-    def findInitialNlpNote(startCharIndex: Int, endCharIndex: Int): Option[NCNlpSentenceNote] =
-        initNlpNotes.get(NoteKey(startCharIndex, endCharIndex))
-
-    /**
-      *
-      * @param nlp
-      */
-    def addNlpToken(nlp: NCNlpSentenceToken): Unit = {
-        require(nlp.size <= 2)
-
-        nlp.foreach(n ⇒ nlpTokens += TokenKey(n.noteType, nlp.startCharIndex, nlp.endCharIndex) → nlp)
-    }
-
-    /**
-      *
-      * @param noteType
-      * @param startCharIndex
-      * @param endCharIndex
-      * @return
-      */
-    def findNlpToken(noteType: String, startCharIndex: Int, endCharIndex: Int): Option[NCNlpSentenceToken] =
-        nlpTokens.get(TokenKey(noteType, startCharIndex, endCharIndex))
-
-    /**
-      *
-      */
-    def getDeletedNotes: Predef.Map[NCNlpSentenceNote, Seq[NCNlpSentenceToken]] = deletedNotes.toMap
-
-    def collapse(mdl: NCModel, lastPhase: Boolean = false): Seq[NCNlpSentence] = {
-        collapseSentence(this, mdl, lastPhase)
-    }
+    def collapse(mdl: NCModel, sen: NCNlpSentence, lastPhase: Boolean = false): Seq[NCNlpSentence] =
+        collapseSentence(sen, mdl, lastPhase)
 }
diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/SyTest.java b/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/SyTest.java
deleted file mode 100644
index 69d9ab4..0000000
--- a/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/SyTest.java
+++ /dev/null
@@ -1,278 +0,0 @@
-package org.apache.nlpcraft.probe.mgrs.nlp.enrichers;
-
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.Comparator;
-import java.util.HashSet;
-import java.util.List;
-import java.util.NavigableSet;
-import java.util.Optional;
-import java.util.Set;
-import java.util.TreeSet;
-import java.util.concurrent.CopyOnWriteArrayList;
-import java.util.concurrent.CountDownLatch;
-import java.util.concurrent.ExecutorService;
-import java.util.concurrent.Executors;
-import java.util.concurrent.TimeUnit;
-import java.util.stream.Collectors;
-
-import static java.util.Arrays.asList;
-import static java.util.stream.Collectors.toList;
-import static java.util.stream.Collectors.toSet;
-
-public class SyTest {
-    public static void main(String[] args) {
-//        List<List<String>> words = asList(
-//            asList("A", "B", "C"),
-//            asList("B", "C", "D"),
-//            asList("B", "D")
-//        );
-        List<List<String>> words = asList(
-            asList("A", "B"),
-            asList("C", "B"),
-            asList("D", "E"),
-            asList("D", "F"),
-            asList("G", "H"),
-            asList("I", "H"),
-            asList("J", "K"),
-            asList("L", "K"),
-            asList("M", "N"),
-            asList("M", "O"),
-            asList("P", "Q"),
-            asList("P", "R"),
-            asList("S", "T"),
-            asList("S", "U"),
-            asList("V", "W"),
-            asList("X", "W")
-            ,
-            asList("Y", "Z"),
-            asList("A1", "A2"),
-            asList("A3", "A3"),
-            asList("A4", "A5", "A6")
-        );
-
-        System.out.println(
-            "Dictionary size:"
-                + words.stream()
-                .flatMap(Collection::stream)
-                .distinct()
-                .count()
-        );
-
-        System.out.println("===== Performance =====");
-
-        for (int i = 0; i < 1; i++) {
-            long t = System.currentTimeMillis();
-
-            Set<Set<String>> combos = findCombos(words);
-
-
-
-            System.out.println("Iteration " + i + " Time: " + (System.currentTimeMillis() - t) + ", resCnt=" + combos.size());
-        }
-
-        if (true) {
-            return;
-        }
-
-        Set<Set<String>> combos = findCombos(words);
-
-        System.out.println();
-        System.out.println("===== Result =====");
-        System.out.println("Total combos: " + combos.size());
-        System.out.println();
-//        combos.stream()
-//            .sorted(Comparator.comparing(Collection::size))
-//            .forEach(combo ->
-//                print(words, combo)
-//            );
-    }
-
-    public static <T extends Comparable<T>> Set<Set<T>> findCombos(List<List<T>> inp) {
-
-
-        List<List<T>> uniqueInp = inp.stream()
-            .filter(row -> inp.stream().noneMatch(it -> it != row && it.containsAll(row)))
-            .map(i -> i.stream().distinct().sorted().collect(toList()))
-            .collect(toList());
-
-        // Build dictionary of unique words.
-        List<T> dict = uniqueInp.stream()
-            .flatMap(Collection::stream)
-            .distinct()
-            .sorted()
-            .collect(toList());
-
-        if (dict.size() > Integer.SIZE) {
-            // Note: Power set of 32 words results in 4294967296 combinations.
-            throw new IllegalArgumentException("Can handle more than " + Integer.SIZE + " unique words in the dictionary.");
-        }
-
-        // Convert words to bitmasks (each bit corresponds to an index in the dictionary).
-        int[] wordBits = uniqueInp.stream()
-            .sorted(Comparator.comparingInt(List::size))
-            .mapToInt(row -> wordsToBits(row, dict))
-            .toArray();
-
-        // Cache words count per row.
-        int[] wordCounts = uniqueInp.stream()
-            .sorted(Comparator.comparingInt(List::size))
-            .mapToInt(List::size)
-            .toArray();
-
-        int min = 1;
-        int max = (int)Math.pow(2, dict.size()) - 1;
-
-        int batchFactor = 100;
-        int threads = 13;
-
-        ExecutorService pool = Executors.newFixedThreadPool(threads);
-        CountDownLatch cdl = new CountDownLatch(batchFactor);
-
-        int divRes = max / batchFactor;
-        int divRest = max % batchFactor;
-
-        int to = 0;
-
-        List<Integer> result = new CopyOnWriteArrayList<>();
-
-        for (int k = 0; k < batchFactor; k++) {
-            to += divRes;
-
-            if (k == divRes - 1) {
-                to += divRest;
-            }
-
-            int toFinal = to;
-            int fromFinal = min + k * divRes;
-
-            pool.execute(
-                () -> {
-                    List<Integer> locRes = new ArrayList<>();
-
-                    for (int comboBits = fromFinal; comboBits < toFinal; comboBits++) {
-                        boolean match = true;
-
-                        // For each input row we check if subtracting the current combination of words
-                        // from the input row would give us the expected result.
-                        for (int j = 0; j < wordBits.length; j++) {
-                            // Get bitmask of how many words can be subtracted from the row.
-                            int commonBits = wordBits[j] & comboBits;
-
-                            int wordsToRemove = Integer.bitCount(commonBits);
-
-                            // Check if there are more than 1 word remaining after subtraction.
-                            if (wordCounts[j] - wordsToRemove > 1) {
-                                // Skip this combination.
-                                match = false;
-
-                                break;
-                            }
-                        }
-
-                        if (match && !includes(comboBits, locRes)) {
-                            locRes.add(comboBits);
-                        }
-                    }
-
-                    result.addAll(locRes);
-
-                    cdl.countDown();
-                }
-            );
-        }
-
-// Iterate over the power set.
-
-        //pool.shutdown();
-        try {
-            cdl.await(Long.MAX_VALUE, TimeUnit.MILLISECONDS);
-            //pool.awaitTermination(Long.MAX_VALUE, TimeUnit.MILLISECONDS);
-        } catch (InterruptedException e) {
-            e.printStackTrace();
-        }
-
-        // Convert found results from bitmasks back to words.
-        TreeSet<Set<T>> treeSet = new TreeSet<>(Comparator.comparingInt(Set::size));
-
-        treeSet.addAll(result.stream().map(bits -> bitsToWords(bits, dict)).collect(toSet()));
-
-        Set<Set<T>> normCombs = new HashSet<>();
-
-        for (Set<T> set : treeSet) {
-            boolean b = true;
-
-            for (Set<T> added : normCombs) {
-                if (added.containsAll(set)) {
-                    b = false;
-
-                    break;
-                }
-            }
-
-            if (b) {
-                normCombs.add(set);
-            }
-        }
-
-        return normCombs;
-
-    }
-
-    private static <T> Set<Set<T>> squeeze(Set<Set<T>> combs) {
-        Set<Set<T>> normCombs = new HashSet<>();
-
-        combs.stream().sorted(Comparator.comparingInt(Set::size)).forEach(comb -> {
-            // Skips already added shorter variants.
-            if (normCombs.stream().filter(comb::containsAll).findAny().isEmpty()) {
-                normCombs.add(comb);
-            }
-        });
-
-        return normCombs;
-    }
-
-
-    private static boolean includes(int bits, List<Integer> allBits) {
-        for (int existing : allBits) {
-            if ((bits & existing) == existing) {
-                return true;
-            }
-        }
-
-        return false;
-    }
-
-    private static <T> int wordsToBits(List<T> words, List<T> dict) {
-        int bits = 0;
-
-        for (int i = 0; i < dict.size(); i++) {
-            if (words.contains(dict.get(i))) {
-                bits |= 1 << i;
-            }
-        }
-
-        return bits;
-    }
-
-    private static <T> Set<T> bitsToWords(int bits, List<T> dict) {
-        Set<T> words = new HashSet<>(Integer.bitCount(bits));
-
-        for (int i = 0; i < dict.size(); i++) {
-            if ((bits & 1 << i) != 0) {
-                words.add(dict.get(i));
-            }
-        }
-
-        return words;
-    }
-
-    private static void print(List<List<String>> inp, List<String> combo) {
-        System.out.println("==== " + combo + "(" + combo.size() + ')');
-        inp.stream().forEach(row -> {
-            Set<String> s = new TreeSet<>(row);
-            s.removeAll(combo);
-            System.out.println(s);
-        });
-    }
-}
diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCEnricherNestedModelSpec4.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCEnricherNestedModelSpec4.scala
index afdeaab..b354533 100644
--- a/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCEnricherNestedModelSpec4.scala
+++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCEnricherNestedModelSpec4.scala
@@ -48,6 +48,6 @@ class NCNestedTestModel4 extends NCModelAdapter(
   */
 @NCTestEnvironment(model = classOf[NCNestedTestModel4], startClient = true)
 class NCEnricherNestedModelSpec4 extends NCTestContext {
-    @Test1
+    @Test
     def test(): Unit = checkIntent("the a " * 8, "onE2")
 }
\ No newline at end of file
diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/Test1.java b/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/Test1.java
deleted file mode 100644
index 398e858..0000000
--- a/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/Test1.java
+++ /dev/null
@@ -1,135 +0,0 @@
-package org.apache.nlpcraft.probe.mgrs.nlp.enrichers.model;
-
-import com.google.common.collect.ImmutableSet;
-
-import java.util.Arrays;
-import java.util.Collection;
-import java.util.Comparator;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Set;
-import java.util.stream.Collectors;
-import java.util.stream.Stream;
-
-public class Test1 {
-    private static List<Set<String>> ROWS =
-        Arrays.asList(
-            ImmutableSet.of("A", "B", "C"),
-            ImmutableSet.of("B", "C", "D"),
-            ImmutableSet.of("B", "D")
-        );
-
-//    // Uncomment it. Works too long time. Normalized result is 256.
-//    private static List<Set<String>> ROWS = Arrays.asList(
-//        ImmutableSet.of("A", "B"),
-//        ImmutableSet.of("C", "B"),
-//        ImmutableSet.of("D", "E"),
-//        ImmutableSet.of("D", "F"),
-//        ImmutableSet.of("G", "H"),
-//        ImmutableSet.of("I", "H"),
-//        ImmutableSet.of("J", "K"),
-//        ImmutableSet.of("L", "K"),
-//        ImmutableSet.of("M", "N"),
-//        ImmutableSet.of("M", "O"),
-//        ImmutableSet.of("P", "Q"),
-//        ImmutableSet.of("P", "R"),
-//        ImmutableSet.of("S", "T"),
-//        ImmutableSet.of("S", "U"),
-//        ImmutableSet.of("V", "W"),
-//        ImmutableSet.of("X", "W")
-//    );
-
-    private static Set<String> ALL = ROWS.stream().flatMap(Collection::stream).collect(Collectors.toSet());
-
-    // Goal: Find minimal set of combinations with following feature.
-    // After removing combination values from each row - list should contain rows with size <= 1.
-
-    // Expected solution: [C, B], [A, C, D], [A, B, D]
-    // Example:
-    // list - [C, B] = {{A}, {D}, {D}}
-    // list - [A, C, D] = {{B}, {B}, {B}}
-    // list - [A, B, D] = {{C}, {C}, {null}}
-
-
-    // Additional. Redundant solutions: [A, B, C] ([C, B] enough),  [A, B, C, D] ([A, C, D] enough) etc
-
-    // Easiest.
-    public static void main(String[] args) {
-        long t = System.currentTimeMillis();
-
-        System.out.println("1. start [time=" + (System.currentTimeMillis() - t) + ']');
-
-        // 1. Extends.
-        List<Set<String>> extRows = extendNulls();
-
-        // 2. All valid rows (permutation)
-        // Or manually permute, like https://stackoverflow.com/questions/17192796/generate-all-combinations-from-multiple-lists
-        Set<List<String>> allSingleOrNullRows = com.google.common.collect.Sets.cartesianProduct(extRows);
-
-        System.out.println("2. permuted [size=" + allSingleOrNullRows.size() + ", time=" + (System.currentTimeMillis() - t) + ']');
-
-        // 3. Collects all suitable combinations.
-        Set<Set<String>> combs =
-            allSingleOrNullRows.
-            stream().
-            // Calculates how that single or empty lines can be constructed (it is required combination).
-            map(row -> {
-                Set<String> copy = new HashSet<>(ALL);
-
-                copy.removeAll(row);
-
-                return copy;
-            }).
-            distinct().
-            filter(Test1::isSuitable).
-            collect(Collectors.toSet());
-
-        System.out.println("3. calculated [size=" + combs.size() + ", time=" + (System.currentTimeMillis() - t) + ']');
-
-        // 3. Normalize variants (keeps only minimal valid subsets, see task description)
-        Set<Set<String>> normCombs = squeeze(combs);
-
-        System.out.println("4. normalized [size=" + normCombs.size() + ", time=" + (System.currentTimeMillis() - t) + ']');
-        System.out.println("Norm results:" + normCombs);
-    }
-
-    /**
-     * Removes `candidate` from each row of ROWS.
-     * Return true if result list doesn't contain any row with size > 1.
-     * <p>
-     * If ROWS is {{a, b}, {a, c}}. Candidate {a, b} - ok, candidate {a} - ok, candidate {b} - no.
-     */
-    private static boolean isSuitable(Set<String> candidate) {
-        for (Set<String> row : ROWS) {
-            Set<String> copy = new HashSet<>(row);
-
-            copy.removeAll(candidate);
-
-            if (copy.size() > 1) {
-                return false;
-            }
-        }
-
-        return true;
-    }
-
-    private static Set<Set<String>> squeeze(Set<Set<String>> combs) {
-        Set<Set<String>> normCombs = new HashSet<>();
-
-        for (Set<String> comb : combs.stream().sorted(Comparator.comparingInt(Set::size)).collect(Collectors.toList())) {
-            // Skips already added shorter variants.
-            if (normCombs.stream().filter(comb::containsAll).findAny().isEmpty()) {
-                normCombs.add(comb);
-            }
-        }
-        return normCombs;
-    }
-
-    // Adds "" which means empty row. For example for small row it returns {{A, B, C, ""}, {B, C, D, ""}, {B, D, ""} }
-    private static List<Set<String>> extendNulls() {
-        return ROWS.stream().map(
-            p -> Stream.concat(p.stream(), Stream.of("")).collect(Collectors.toSet())
-        ).collect(Collectors.toList());
-    }
-}
-
diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/Test2.java b/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/Test2.java
deleted file mode 100644
index b8e5e42..0000000
--- a/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/Test2.java
+++ /dev/null
@@ -1,114 +0,0 @@
-package org.apache.nlpcraft.probe.mgrs.nlp.enrichers.model;
-
-import com.google.common.collect.ImmutableSet;
-import com.google.common.collect.Sets;
-
-import java.util.Arrays;
-import java.util.Collection;
-import java.util.Comparator;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Set;
-import java.util.stream.Collectors;
-
-public class Test2 {
-//    private static List<Set<String>> ROWS =
-//        Arrays.asList(
-//            ImmutableSet.of("A", "B", "C"),
-//            ImmutableSet.of("B", "C", "D"),
-//            ImmutableSet.of("B", "D")
-//        );
-
-    // Uncomment it. Works too long time. Normalized result is 256.
-    private static List<Set<String>> ROWS = Arrays.asList(
-        ImmutableSet.of("A", "B"),
-        ImmutableSet.of("C", "B"),
-        ImmutableSet.of("D", "E"),
-        ImmutableSet.of("D", "F"),
-        ImmutableSet.of("G", "H"),
-        ImmutableSet.of("I", "H"),
-        ImmutableSet.of("J", "K"),
-        ImmutableSet.of("L", "K"),
-        ImmutableSet.of("M", "N"),
-        ImmutableSet.of("M", "O"),
-        ImmutableSet.of("P", "Q"),
-        ImmutableSet.of("P", "R"),
-        ImmutableSet.of("S", "T"),
-        ImmutableSet.of("S", "U"),
-        ImmutableSet.of("V", "W"),
-        ImmutableSet.of("X", "W")
-    );
-
-    private static Set<String> ALL = ROWS.stream().flatMap(Collection::stream).collect(Collectors.toSet());
-
-    // Goal: Find minimal set of combinations with following feature.
-    // After removing combination values from each row - list should contain rows with size <= 1.
-
-    // Expected solution: [C, B], [A, C, D], [A, B, D]
-    // Example:
-    // list - [C, B] = {{A}, {D}, {D}}
-    // list - [A, C, D] = {{B}, {B}, {B}}
-    // list - [A, B, D] = {{C}, {C}, {null}}
-
-
-    // Additional. Redundant solutions: [A, B, C] ([C, B] enough),  [A, B, C, D] ([A, C, D] enough) etc
-
-    // Easiest.
-    public static void main(String[] args) {
-        long t = System.currentTimeMillis();
-
-        System.out.println("1. start [time=" + (System.currentTimeMillis() - t) + ']');
-
-        Set<Set<String>> combs = new HashSet<>();
-
-        for (int i = 1; i < ALL.size(); i++) {
-            combs.addAll(
-                Sets.combinations(ALL, i).
-                    stream().
-                    filter(Test2::isSuitable).
-                    collect(Collectors.toSet())
-            );
-        }
-
-        System.out.println("2. calculated [size=" + combs.size() + ", time=" + (System.currentTimeMillis() - t) + ']');
-
-        // Normalize variants (keeps only minimal valid subsets, see task description)
-        Set<Set<String>> normCombs = squeeze(combs);
-
-        System.out.println("3. normalized [size=" + normCombs.size() + ", time=" + (System.currentTimeMillis() - t) + ']');
-        System.out.println("Norm results:" + normCombs);
-    }
-
-    private static Set<Set<String>> squeeze(Set<Set<String>> combs) {
-        Set<Set<String>> normCombs = new HashSet<>();
-
-        for (Set<String> comb : combs.stream().sorted(Comparator.comparingInt(Set::size)).collect(Collectors.toList())) {
-            // Skips already added shorter variants.
-            if (normCombs.stream().filter(comb::containsAll).findAny().isEmpty()) {
-                normCombs.add(comb);
-            }
-        }
-        return normCombs;
-    }
-
-    /**
-     * Removes `candidate` from each row of ROWS.
-     * Return true if result list doesn't contain any row with size > 1.
-     * <p>
-     * If ROWS is {{a, b}, {a, c}}. Candidate {a, b} - ok, candidate {a} - ok, candidate {b} - no.
-     */
-    private static boolean isSuitable(Set<String> candidate) {
-        for (Set<String> row : ROWS) {
-            Set<String> copy = new HashSet<>(row);
-
-            copy.removeAll(candidate);
-
-            if (copy.size() > 1) {
-                return false;
-            }
-        }
-
-        return true;
-    }
-}
-

[incubator-nlpcraft] 02/05: WIP.

Posted by se...@apache.org.

This is an automated email from the ASF dual-hosted git repository.

sergeykamov pushed a commit to branch NLPCRAFT-261
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git

commit 3b5a380390d346b24391311c7ec771c4ff9f1fac
Author: Sergey Kamov <sk...@gmail.com>
AuthorDate: Sun Mar 7 15:45:09 2021 +0300

    WIP.
---
 .../apache/nlpcraft/common/nlp/NCNlpSentence.scala | 25 +++++++++++++------
 .../nlpcraft/common/util/NCComboRecursiveTask.java | 29 ++++++++--------------
 .../model/NCEnricherNestedModelSpec.scala          | 18 +++++++-------
 3 files changed, 37 insertions(+), 35 deletions(-)

diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala
index 9d9cb98..95a98a3 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala
@@ -626,6 +626,8 @@ object NCNlpSentence extends LazyLogging {
                 flatMap(note ⇒ getNotNlpNotes(note.tokenIndexes.sorted.map(i ⇒ thisSen(i))).filter(_ != note)).
                 distinct
 
+        println("delCombs="+delCombs.mkString("\n"))
+
         // Optimization. Deletes all wholly swallowed notes.
         val links = getLinks(thisSen.flatten)
 
@@ -670,6 +672,7 @@ object NCNlpSentence extends LazyLogging {
 //        }
 
         val dict = mutable.HashMap.empty[String, NCNlpSentenceNote]
+        val dictBack = mutable.HashMap.empty[NCNlpSentenceNote, String]
 
         var i = 'A'
 
@@ -677,11 +680,20 @@ object NCNlpSentence extends LazyLogging {
             toksByIdx.map(seq ⇒ {
                 seq.map(
                     n ⇒ {
-                        val s = s"$i"
+                        val s =
+                            dictBack.get(n) match {
+                                case Some(s) ⇒ s
+                                case None ⇒ {
+                                    val s = s"$i"
+
+                                    i = (i.toInt + 1).toChar
 
-                        i = (i.toInt + 1).toChar
+                                    dict += s → n
+                                    dictBack += n → s
 
-                        dict += s → n
+                                    s
+                                }
+                            }
 
                         s
                     }
@@ -695,16 +707,13 @@ object NCNlpSentence extends LazyLogging {
                 val p = new ForkJoinPool()
 
                 val tmp = NCComboRecursiveTask.findCombinations(
-                    converted.map(_.asJava).asJava,
-                    new Comparator[String]() {
-                        override def compare(n1: String, n2: String): Int = n1.compareTo(n2)
-                    },
+                    toksByIdx.map(_.asJava).asJava,
                     p
                 )
 
                 p.shutdown()
 
-                val seq1 = tmp.asScala.map(_.asScala.map(dict))
+                val seq1 = tmp.asScala.map(_.asScala)
 
                 val sens =
                     seq1.
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/util/NCComboRecursiveTask.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/util/NCComboRecursiveTask.java
index 017c10e..834735b 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/util/NCComboRecursiveTask.java
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/util/NCComboRecursiveTask.java
@@ -19,7 +19,9 @@ package org.apache.nlpcraft.common.util;
 
 import java.util.ArrayList;
 import java.util.Collection;
+import java.util.Collections;
 import java.util.Comparator;
+import java.util.HashSet;
 import java.util.List;
 import java.util.concurrent.ForkJoinPool;
 import java.util.concurrent.RecursiveTask;
@@ -42,30 +44,17 @@ public class NCComboRecursiveTask extends RecursiveTask<List<Long>> {
         this.wordCounts = wordCounts;
     }
 
-    public static <T> List<List<T>> findCombinations(List<List<T>> inp, Comparator<T> comparator, ForkJoinPool pool) {
-        List<List<T>> uniqueInp = inp.stream()
-            .filter(row -> inp.stream().noneMatch(it -> !it.equals(row)  && it.containsAll(row)))
-            .map(i -> i.stream().distinct().sorted(comparator).collect(toList()))
-            .collect(toList());
-
-
-        System.out.println("!!!");
-        for (List<T> ts : uniqueInp) {
-            System.out.println("!!!ts=");
-            System.out.println(ts.stream().map(Object::toString).collect(Collectors.joining("\n")));
-        }
-        System.out.println("!!!");
+    public static <T> List<List<T>> findCombinations(List<List<T>> inp, ForkJoinPool pool) {
+        List<List<T>> uniqueInp = inp;
 
         // Build dictionary of unique words.
         List<T> dict = uniqueInp.stream()
             .flatMap(Collection::stream)
             .distinct()
-            .sorted(comparator)
             .collect(toList());
 
-        System.out.println("dict=");
-        System.out.println(dict.stream().map(Object::toString).collect(Collectors.joining("\n")));
-        System.out.println();
+        System.out.println("inp=" + inp);
+        System.out.println("dict=" + dict);
 
         if (dict.size() > Long.SIZE) {
             // Note: Power set of 64 words results in 9223372036854775807 combinations.
@@ -87,7 +76,11 @@ public class NCComboRecursiveTask extends RecursiveTask<List<Long>> {
 
         NCComboRecursiveTask task = new NCComboRecursiveTask(lo, hi, wordBits, wordCounts);
 
-        return pool.invoke(task).stream().map(bits -> bitsToWords(bits, dict)).collect(toList());
+        final List<List<T>> res = pool.invoke(task).stream().map(bits -> bitsToWords(bits, dict)).collect(toList());
+
+        System.out.println("res=" + res);
+
+        return res;
     }
 
     @Override
diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCEnricherNestedModelSpec.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCEnricherNestedModelSpec.scala
index 9290d56..658feda 100644
--- a/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCEnricherNestedModelSpec.scala
+++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCEnricherNestedModelSpec.scala
@@ -47,15 +47,15 @@ class NCEnricherNestedModelSpec extends NCEnricherBaseSpec {
     @Test
     def test(): Unit =
         runBatch(
-            _ ⇒ checkExists(
-                "tomorrow",
-                usr(text = "tomorrow", id = "x3")
-            ),
-            _ ⇒ checkExists(
-                "tomorrow yesterday",
-                usr(text = "tomorrow", id = "x3"),
-                usr(text = "yesterday", id = "x3")
-            ),
+//            _ ⇒ checkExists(
+//                "tomorrow",
+//                usr(text = "tomorrow", id = "x3")
+//            ),
+//            _ ⇒ checkExists(
+//                "tomorrow yesterday",
+//                usr(text = "tomorrow", id = "x3"),
+//                usr(text = "yesterday", id = "x3")
+//            ),
             _ ⇒ checkExists(
                 "y y",
                 usr(text = "y y", id = "y3")