You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nlpcraft.apache.org by ar...@apache.org on 2021/03/10 00:37:47 UTC

[incubator-nlpcraft] 04/17: WIP.

This is an automated email from the ASF dual-hosted git repository.

aradzinski pushed a commit to branch NLPCRAFT-261
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git

commit b7ab66f52702ddd00f2b88219963154b833401c8
Author: Sergey Kamov <sk...@gmail.com>
AuthorDate: Thu Mar 4 16:05:48 2021 +0300

    WIP.
---
 .../apache/nlpcraft/common/nlp/NCNlpSentence.scala | 290 +++++++++++----------
 .../model/NCEnricherNestedModelSpec4.scala         |  53 ++++
 .../probe/mgrs/nlp/enrichers/model/Test1.java      | 135 ++++++++++
 .../probe/mgrs/nlp/enrichers/model/Test2.java      | 114 ++++++++
 4 files changed, 453 insertions(+), 139 deletions(-)

diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala
index 113e088..f530327 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala
@@ -22,10 +22,9 @@ import org.apache.nlpcraft.common.NCE
 import org.apache.nlpcraft.common.nlp.pos.NCPennTreebank
 import org.apache.nlpcraft.model.NCModel
 
-import java.util
-import java.util.{List ⇒ JList}
 import java.io.{Serializable ⇒ JSerializable}
-import java.util.Collections
+import java.util
+import java.util.{Collections, List ⇒ JList}
 import scala.collection.JavaConverters._
 import scala.collection.mutable.ArrayBuffer
 import scala.collection.{Map, Seq, Set, mutable}
@@ -42,8 +41,10 @@ object NCNlpSentence extends LazyLogging {
         require(start <= end)
 
         private def in(i: Int): Boolean = i >= start && i <= end
+
         def intersect(id: String, start: Int, end: Int): Boolean = id == this.id && (in(start) || in(end))
     }
+
     object PartKey {
         def apply(m: util.HashMap[String, JSerializable]): PartKey = {
             def get[T](name: String): T = m.get(name).asInstanceOf[T]
@@ -71,7 +72,7 @@ object NCNlpSentence extends LazyLogging {
                 noteLinks ++=
                     (for ((name, idxs) ← names.asScala.zip(idxsSeq.asScala.map(_.asScala)))
                         yield NoteLink(name, idxs.sorted)
-                    )
+                        )
             }
 
             if (n.contains("subjnotes")) add("subjnotes", "subjindexes")
@@ -409,7 +410,8 @@ object NCNlpSentence extends LazyLogging {
             "stopWord" → stop,
             "bracketed" → false,
             "direct" → direct,
-            "dict" → (if (nsCopyToks.size == 1) nsCopyToks.head.getNlpNote.data[Boolean]("dict") else false),
+            "dict" → (if (nsCopyToks.size == 1) nsCopyToks.head.getNlpNote.data[Boolean]("dict")
+            else false),
             "english" → nsCopyToks.forall(_.getNlpNote.data[Boolean]("english")),
             "swear" → nsCopyToks.exists(_.getNlpNote.data[Boolean]("swear"))
         )
@@ -456,7 +458,8 @@ object NCNlpSentence extends LazyLogging {
                     var fixed = idxs
 
                     history.foreach {
-                        case (idxOld, idxNew) ⇒ fixed = fixed.map(_.map(i ⇒ if (i == idxOld) idxNew else i).distinct)
+                        case (idxOld, idxNew) ⇒ fixed = fixed.map(_.map(i ⇒ if (i == idxOld) idxNew
+                        else i).distinct)
                     }
 
                     if (fixed.forall(_.size == 1))
@@ -467,6 +470,7 @@ object NCNlpSentence extends LazyLogging {
                         ok = false
                 case None ⇒ // No-op.
             }
+
         ok &&
             ns.flatMap(_.getNotes(noteType)).forall(rel ⇒
                 rel.dataOpt[JList[Int]](idxsField) match {
@@ -518,9 +522,9 @@ object NCNlpSentence extends LazyLogging {
 
         val res =
             fixIndexesReferences("nlpcraft:relation", "indexes", "note", ns, history) &&
-            fixIndexesReferences("nlpcraft:limit", "indexes", "note", ns, history) &&
-            fixIndexesReferencesList("nlpcraft:sort", "subjindexes", "subjnotes", ns, history) &&
-            fixIndexesReferencesList("nlpcraft:sort", "byindexes", "bynotes", ns, history)
+                fixIndexesReferences("nlpcraft:limit", "indexes", "note", ns, history) &&
+                fixIndexesReferencesList("nlpcraft:sort", "subjindexes", "subjnotes", ns, history) &&
+                fixIndexesReferencesList("nlpcraft:sort", "byindexes", "bynotes", ns, history)
 
         if (res) {
             // Validation (all indexes calculated well)
@@ -538,115 +542,40 @@ object NCNlpSentence extends LazyLogging {
 
         res
     }
-}
-
-import org.apache.nlpcraft.common.nlp.NCNlpSentence._
-
-/**
-  * Parsed NLP sentence is a collection of tokens. Each token is a collection of notes and
-  * each note is a collection of KV pairs.
-  *
-  * @param srvReqId Server request ID.
-  * @param text Normalized text.
-  * @param enabledBuiltInToks Enabled built-in tokens.
-  * @param tokens Initial buffer.
-  * @param deletedNotes Deleted overridden notes with their tokens.
-  */
-class NCNlpSentence(
-    val srvReqId: String,
-    val text: String,
-    val enabledBuiltInToks: Set[String],
-    override val tokens: mutable.ArrayBuffer[NCNlpSentenceToken] = new mutable.ArrayBuffer[NCNlpSentenceToken](32),
-    private val deletedNotes: mutable.HashMap[NCNlpSentenceNote, Seq[NCNlpSentenceToken]] = mutable.HashMap.empty,
-    private var initNlpNotes: Map[NoteKey, NCNlpSentenceNote] = null,
-    private val nlpTokens: mutable.HashMap[TokenKey, NCNlpSentenceToken] = mutable.HashMap.empty
-) extends NCNlpSentenceTokenBuffer(tokens) with JSerializable {
-    @transient
-    private var hash: java.lang.Integer = _
-
-    private def calcHash(): Int =
-        Seq(srvReqId, text, enabledBuiltInToks, tokens).map(_.hashCode()).foldLeft(0)((a, b) ⇒ 31 * a + b)
-
-    private def addDeleted(sen: NCNlpSentence, dels: Iterable[NCNlpSentenceNote]): Unit =
-        sen.deletedNotes ++= dels.map(n ⇒ {
-            val savedDelNote = n.clone()
-            val savedDelToks = n.tokenIndexes.map(idx ⇒ this(idx).clone())
-
-            val mainNotes = savedDelToks.flatten.filter(n ⇒ n.noteType != "nlpcraft:nlp" && n != savedDelNote)
-
-            // Deleted note's tokens should contains only nlp data and deleted notes.
-            for (savedDelTok ← savedDelToks; mainNote ← mainNotes)
-                savedDelTok.remove(mainNote)
-
-            savedDelNote → savedDelToks
-        })
-
-    // Deep copy.
-    override def clone(): NCNlpSentence =
-        new NCNlpSentence(
-            srvReqId,
-            text,
-            enabledBuiltInToks,
-            tokens.map(_.clone()),
-            deletedNotes.map(p ⇒ p._1.clone() → p._2.map(_.clone())),
-            initNlpNotes = initNlpNotes
-        )
-
-    /**
-      * Utility method that gets set of notes for given note type collected from
-      * tokens in this sentence. Notes are sorted in the same order they appear
-      * in this sentence.
-      *
-      * @param noteType Note type.
-      */
-    def getNotes(noteType: String): Seq[NCNlpSentenceNote] = this.flatMap(_.getNotes(noteType)).distinct
-
-    /**
-      * Utility method that removes note with given ID from all tokens in this sentence.
-      * No-op if such note wasn't found.
-      *
-      * @param note Note.
-      */
-    def removeNote(note: NCNlpSentenceNote): Unit = this.foreach(_.remove(note))
-
-    //noinspection HashCodeUsesVar
-    override def hashCode(): Int = {
-        if (hash == null)
-            hash = calcHash()
-
-        hash
-    }
-
-    def fixNote(note: NCNlpSentenceNote, kvs: (String, JSerializable)*): Unit = {
-        val fixed = note.clone(kvs: _*)
-
-        this.filter(t ⇒ t.index >= fixed.tokenIndexes.head && t.index <= fixed.tokenIndexes.last).foreach(t ⇒ {
-            t.remove(note)
-            t.add(fixed)
-        })
-
-        hash = null
-    }
 
     private def dropAbstract(mdl: NCModel, ns: NCNlpSentence): Unit =
         if (!mdl.getAbstractTokens.isEmpty) {
             val notes = ns.flatten
 
-            val keys = getPartKeys(notes :_*)
+            val keys = getPartKeys(notes: _*)
             val noteLinks = getLinks(notes)
 
             notes.filter(n ⇒ {
                 val noteToks = ns.tokens.filter(_.contains(n))
 
                 mdl.getAbstractTokens.contains(n.noteType) &&
-                !keys.exists(_.intersect(n.noteType, noteToks.head.startCharIndex, noteToks.last.startCharIndex)) &&
-                !noteLinks.contains(NoteLink(n.noteType, n.tokenIndexes.sorted))
+                    !keys.exists(_.intersect(n.noteType, noteToks.head.startCharIndex, noteToks.last.startCharIndex)) &&
+                    !noteLinks.contains(NoteLink(n.noteType, n.tokenIndexes.sorted))
             }).foreach(ns.removeNote)
         }
 
     private def getNotNlpNotes(toks: Seq[NCNlpSentenceToken]): Seq[NCNlpSentenceNote] =
         toks.flatten.filter(!_.isNlp).distinct
 
+    private def addDeleted(thisSen: NCNlpSentence, sen: NCNlpSentence, dels: Iterable[NCNlpSentenceNote]): Unit =
+        sen.deletedNotes ++= dels.map(n ⇒ {
+            val savedDelNote = n.clone()
+            val savedDelToks = n.tokenIndexes.map(idx ⇒ thisSen(idx).clone())
+
+            val mainNotes = savedDelToks.flatten.filter(n ⇒ n.noteType != "nlpcraft:nlp" && n != savedDelNote)
+
+            // Deleted note's tokens should contains only nlp data and deleted notes.
+            for (savedDelTok ← savedDelToks; mainNote ← mainNotes)
+                savedDelTok.remove(mainNote)
+
+            savedDelNote → savedDelToks
+        })
+
     /**
       * This collapser handles several tasks:
       * - "overall" collapsing after all other individual collapsers had their turn.
@@ -656,12 +585,13 @@ class NCNlpSentence(
       * lengths - the winning note is chosen based on this priority.
       */
     @throws[NCE]
-    def collapse(mdl: NCModel, lastPhase: Boolean = false): Seq[NCNlpSentence] = {
+    private def collapseSentence(thisSen: NCNlpSentence, mdl: NCModel, lastPhase: Boolean = false): Seq[NCNlpSentence] = {
         def collapse0(ns: NCNlpSentence): Option[NCNlpSentence] = {
             if (lastPhase)
                 dropAbstract(mdl, ns)
 
-            if (collapseSentence(ns, getNotNlpNotes(ns).map(_.noteType).distinct)) Some(ns) else None
+            if (collapseSentence(ns, getNotNlpNotes(ns).map(_.noteType).distinct)) Some(ns)
+            else None
         }
 
         // Always deletes `similar` notes.
@@ -669,37 +599,37 @@ class NCNlpSentence(
         // We keep only one variant -  with `best` direct and sparsity parameters,
         // other variants for these words are redundant.
         val redundant: Seq[NCNlpSentenceNote] =
-            this.flatten.filter(!_.isNlp).distinct.
-                groupBy(_.getKey()).
-                map(p ⇒ p._2.sortBy(p ⇒
-                    (
-                        // System notes don't have such flags.
-                        if (p.isUser) {
-                            if (p.isDirect)
-                                0
-                            else
-                                1
-                        }
-                        else
-                            0,
-                        if (p.isUser)
-                            p.sparsity
-                        else
+        thisSen.flatten.filter(!_.isNlp).distinct.
+            groupBy(_.getKey()).
+            map(p ⇒ p._2.sortBy(p ⇒
+                (
+                    // System notes don't have such flags.
+                    if (p.isUser) {
+                        if (p.isDirect)
                             0
-                    )
-                )).
-                flatMap(_.drop(1)).
-                toSeq
+                        else
+                            1
+                    }
+                    else
+                        0,
+                    if (p.isUser)
+                        p.sparsity
+                    else
+                        0
+                )
+            )).
+            flatMap(_.drop(1)).
+            toSeq
 
-        redundant.foreach(this.removeNote)
+        redundant.foreach(thisSen.removeNote)
 
         var delCombs: Seq[NCNlpSentenceNote] =
-            getNotNlpNotes(this).
-                flatMap(note ⇒ getNotNlpNotes(note.tokenIndexes.sorted.map(i ⇒ this(i))).filter(_ != note)).
+            getNotNlpNotes(thisSen).
+                flatMap(note ⇒ getNotNlpNotes(note.tokenIndexes.sorted.map(i ⇒ thisSen(i))).filter(_ != note)).
                 distinct
 
         // Optimization. Deletes all wholly swallowed notes.
-        val links = getLinks(this.flatten)
+        val links = getLinks(thisSen.flatten)
 
         val swallowed =
             delCombs.
@@ -709,22 +639,24 @@ class NCNlpSentence(
                 filter(getPartKeys(_).isEmpty).
                 flatMap(note ⇒ {
                     val noteWordsIdxs = note.wordIndexes.toSet
-                    val key = PartKey(note, this)
+                    val key = PartKey(note, thisSen)
 
                     val delCombOthers =
-                        delCombs.filter(_ != note).flatMap(n ⇒ if (getPartKeys(n).contains(key)) Some(n) else None)
+                        delCombs.filter(_ != note).flatMap(n ⇒ if (getPartKeys(n).contains(key)) Some(n)
+                        else None)
 
-                    if (delCombOthers.exists(o ⇒ noteWordsIdxs == o.wordIndexes.toSet)) Some(note) else None
+                    if (delCombOthers.exists(o ⇒ noteWordsIdxs == o.wordIndexes.toSet)) Some(note)
+                    else None
                 })
 
         delCombs = delCombs.filter(p ⇒ !swallowed.contains(p))
-        addDeleted(this, swallowed)
-        swallowed.foreach(this.removeNote)
+        addDeleted(thisSen, thisSen, swallowed)
+        swallowed.foreach(thisSen.removeNote)
 
-        val toksByIdx: Seq[Seq[NCNlpSentenceNote]] =
+        val toksByIdx: Seq[Set[NCNlpSentenceNote]] =
             delCombs.flatMap(note ⇒ note.wordIndexes.map(_ → note)).
                 groupBy { case (idx, _) ⇒ idx }.
-                map { case (_, seq) ⇒ seq.map { case (_, note) ⇒ note } }.
+                map { case (_, seq) ⇒ seq.map { case (_, note) ⇒ note }.toSet }.
                 toSeq.sortBy(-_.size)
 
         val minDelSize = if (toksByIdx.isEmpty) 1 else toksByIdx.map(_.size).max - 1
@@ -741,7 +673,7 @@ class NCNlpSentence(
                                     !toksByIdx.exists(
                                         rec ⇒
                                             rec.size - delCombs.size <= 1 &&
-                                            rec.count(note ⇒ !delComb.contains(note)) > 1
+                                                rec.count(note ⇒ !delComb.contains(note)) > 1
                                     )
                                 )
                         ).
@@ -750,10 +682,10 @@ class NCNlpSentence(
                         flatMap(delComb ⇒
                             // Already processed with less subset of same deleted tokens.
                             if (!deleted.exists(_.subsetOf(delComb))) {
-                                val nsClone = this.clone()
+                                val nsClone = thisSen.clone()
 
                                 // Saves deleted notes for sentence and their tokens.
-                                addDeleted(nsClone, delComb)
+                                addDeleted(thisSen, nsClone, delComb)
                                 delComb.foreach(nsClone.removeNote)
 
                                 // Has overlapped notes for some tokens.
@@ -787,7 +719,8 @@ class NCNlpSentence(
                             p.clone().filter(_._1 != "direct")
                         )
 
-                    (Key(get(sysNotes), get(userNotes)), sen, nlpNotes.map(p ⇒ if (p.isDirect) 0 else 1).sum)
+                    (Key(get(sysNotes), get(userNotes)), sen, nlpNotes.map(p ⇒ if (p.isDirect) 0
+                    else 1).sum)
                 }).
                     foreach { case (key, sen, directCnt) ⇒
                         m.get(key) match {
@@ -802,7 +735,7 @@ class NCNlpSentence(
                 m.values.map(_.sentence).toSeq
             }
             else
-                collapse0(this).flatMap(p ⇒ Option(Seq(p))).getOrElse(Seq.empty)
+                collapse0(thisSen).flatMap(p ⇒ Option(Seq(p))).getOrElse(Seq.empty)
 
         sens = sens.distinct
 
@@ -822,6 +755,81 @@ class NCNlpSentence(
             map { case (_, seq) ⇒ seq.minBy(_.filter(p ⇒ p.isNlp && !p.isStopWord).map(_.wordIndexes.length).sum) }.
             toSeq
     }
+}
+
+import org.apache.nlpcraft.common.nlp.NCNlpSentence._
+
+/**
+  * Parsed NLP sentence is a collection of tokens. Each token is a collection of notes and
+  * each note is a collection of KV pairs.
+  *
+  * @param srvReqId Server request ID.
+  * @param text Normalized text.
+  * @param enabledBuiltInToks Enabled built-in tokens.
+  * @param tokens Initial buffer.
+  * @param deletedNotes Deleted overridden notes with their tokens.
+  */
+class NCNlpSentence(
+    val srvReqId: String,
+    val text: String,
+    val enabledBuiltInToks: Set[String],
+    override val tokens: mutable.ArrayBuffer[NCNlpSentenceToken] = new mutable.ArrayBuffer[NCNlpSentenceToken](32),
+    private val deletedNotes: mutable.HashMap[NCNlpSentenceNote, Seq[NCNlpSentenceToken]] = mutable.HashMap.empty,
+    private var initNlpNotes: Map[NoteKey, NCNlpSentenceNote] = null,
+    private val nlpTokens: mutable.HashMap[TokenKey, NCNlpSentenceToken] = mutable.HashMap.empty
+) extends NCNlpSentenceTokenBuffer(tokens) with JSerializable {
+    @transient
+    private var hash: java.lang.Integer = _
+
+    private def calcHash(): Int =
+        Seq(srvReqId, text, enabledBuiltInToks, tokens).map(_.hashCode()).foldLeft(0)((a, b) ⇒ 31 * a + b)
+
+    // Deep copy.
+    override def clone(): NCNlpSentence =
+        new NCNlpSentence(
+            srvReqId,
+            text,
+            enabledBuiltInToks,
+            tokens.map(_.clone()),
+            deletedNotes.map(p ⇒ p._1.clone() → p._2.map(_.clone())),
+            initNlpNotes = initNlpNotes
+        )
+
+    /**
+      * Utility method that gets set of notes for given note type collected from
+      * tokens in this sentence. Notes are sorted in the same order they appear
+      * in this sentence.
+      *
+      * @param noteType Note type.
+      */
+    def getNotes(noteType: String): Seq[NCNlpSentenceNote] = this.flatMap(_.getNotes(noteType)).distinct
+
+    /**
+      * Utility method that removes note with given ID from all tokens in this sentence.
+      * No-op if such note wasn't found.
+      *
+      * @param note Note.
+      */
+    def removeNote(note: NCNlpSentenceNote): Unit = this.foreach(_.remove(note))
+
+    //noinspection HashCodeUsesVar
+    override def hashCode(): Int = {
+        if (hash == null)
+            hash = calcHash()
+
+        hash
+    }
+
+    def fixNote(note: NCNlpSentenceNote, kvs: (String, JSerializable)*): Unit = {
+        val fixed = note.clone(kvs: _*)
+
+        this.filter(t ⇒ t.index >= fixed.tokenIndexes.head && t.index <= fixed.tokenIndexes.last).foreach(t ⇒ {
+            t.remove(note)
+            t.add(fixed)
+        })
+
+        hash = null
+    }
 
     /**
       * Returns flag are note notes equal (or similar) or not. Reason of ignored difference can be stopwords tokens.
@@ -931,4 +939,8 @@ class NCNlpSentence(
       *
       */
     def getDeletedNotes: Predef.Map[NCNlpSentenceNote, Seq[NCNlpSentenceToken]] = deletedNotes.toMap
+
+    def collapse(mdl: NCModel, lastPhase: Boolean = false): Seq[NCNlpSentence] = {
+        collapseSentence(this, mdl, lastPhase)
+    }
 }
diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCEnricherNestedModelSpec4.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCEnricherNestedModelSpec4.scala
new file mode 100644
index 0000000..afdeaab
--- /dev/null
+++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCEnricherNestedModelSpec4.scala
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nlpcraft.probe.mgrs.nlp.enrichers.model
+
+import org.apache.nlpcraft.model.{NCElement, NCIntent, NCIntentMatch, NCModelAdapter, NCResult}
+import org.apache.nlpcraft.{NCTestContext, NCTestElement, NCTestEnvironment}
+import org.junit.jupiter.api.Test
+
+import java.util
+import scala.collection.JavaConverters._
+
+/**
+  * Nested Elements test model.
+  */
+class NCNestedTestModel4 extends NCModelAdapter(
+    "nlpcraft.nested3.test.mdl", "Nested Data Test Model", "1.0"
+) {
+    override def getElements: util.Set[NCElement] =
+        Set(
+            NCTestElement("e1", "//[a-zA-Z0-9]+//"),
+            NCTestElement("e2", "the ^^(id == 'e1')^^")
+        )
+
+    override def getAbstractTokens: util.Set[String] = Set("e1").asJava
+    override def getEnabledBuiltInTokens: util.Set[String] = Set.empty[String].asJava
+
+    @NCIntent("intent=onE2 term(t1)={id == 'e2'}[8, 100]")
+    def onAB(ctx: NCIntentMatch): NCResult = NCResult.text("OK")
+}
+
+/**
+  * It shouldn't be too slow.
+  */
+@NCTestEnvironment(model = classOf[NCNestedTestModel4], startClient = true)
+class NCEnricherNestedModelSpec4 extends NCTestContext {
+    @Test1
+    def test(): Unit = checkIntent("the a " * 8, "onE2")
+}
\ No newline at end of file
diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/Test1.java b/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/Test1.java
new file mode 100644
index 0000000..398e858
--- /dev/null
+++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/Test1.java
@@ -0,0 +1,135 @@
+package org.apache.nlpcraft.probe.mgrs.nlp.enrichers.model;
+
+import com.google.common.collect.ImmutableSet;
+
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Comparator;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+import java.util.stream.Collectors;
+import java.util.stream.Stream;
+
+public class Test1 {
+    private static List<Set<String>> ROWS =
+        Arrays.asList(
+            ImmutableSet.of("A", "B", "C"),
+            ImmutableSet.of("B", "C", "D"),
+            ImmutableSet.of("B", "D")
+        );
+
+//    // Uncomment it. Works too long time. Normalized result is 256.
+//    private static List<Set<String>> ROWS = Arrays.asList(
+//        ImmutableSet.of("A", "B"),
+//        ImmutableSet.of("C", "B"),
+//        ImmutableSet.of("D", "E"),
+//        ImmutableSet.of("D", "F"),
+//        ImmutableSet.of("G", "H"),
+//        ImmutableSet.of("I", "H"),
+//        ImmutableSet.of("J", "K"),
+//        ImmutableSet.of("L", "K"),
+//        ImmutableSet.of("M", "N"),
+//        ImmutableSet.of("M", "O"),
+//        ImmutableSet.of("P", "Q"),
+//        ImmutableSet.of("P", "R"),
+//        ImmutableSet.of("S", "T"),
+//        ImmutableSet.of("S", "U"),
+//        ImmutableSet.of("V", "W"),
+//        ImmutableSet.of("X", "W")
+//    );
+
+    private static Set<String> ALL = ROWS.stream().flatMap(Collection::stream).collect(Collectors.toSet());
+
+    // Goal: Find minimal set of combinations with following feature.
+    // After removing combination values from each row - list should contain rows with size <= 1.
+
+    // Expected solution: [C, B], [A, C, D], [A, B, D]
+    // Example:
+    // list - [C, B] = {{A}, {D}, {D}}
+    // list - [A, C, D] = {{B}, {B}, {B}}
+    // list - [A, B, D] = {{C}, {C}, {null}}
+
+
+    // Additional. Redundant solutions: [A, B, C] ([C, B] enough),  [A, B, C, D] ([A, C, D] enough) etc
+
+    // Easiest.
+    public static void main(String[] args) {
+        long t = System.currentTimeMillis();
+
+        System.out.println("1. start [time=" + (System.currentTimeMillis() - t) + ']');
+
+        // 1. Extends.
+        List<Set<String>> extRows = extendNulls();
+
+        // 2. All valid rows (permutation)
+        // Or manually permute, like https://stackoverflow.com/questions/17192796/generate-all-combinations-from-multiple-lists
+        Set<List<String>> allSingleOrNullRows = com.google.common.collect.Sets.cartesianProduct(extRows);
+
+        System.out.println("2. permuted [size=" + allSingleOrNullRows.size() + ", time=" + (System.currentTimeMillis() - t) + ']');
+
+        // 3. Collects all suitable combinations.
+        Set<Set<String>> combs =
+            allSingleOrNullRows.
+            stream().
+            // Calculates how that single or empty lines can be constructed (it is required combination).
+            map(row -> {
+                Set<String> copy = new HashSet<>(ALL);
+
+                copy.removeAll(row);
+
+                return copy;
+            }).
+            distinct().
+            filter(Test1::isSuitable).
+            collect(Collectors.toSet());
+
+        System.out.println("3. calculated [size=" + combs.size() + ", time=" + (System.currentTimeMillis() - t) + ']');
+
+        // 3. Normalize variants (keeps only minimal valid subsets, see task description)
+        Set<Set<String>> normCombs = squeeze(combs);
+
+        System.out.println("4. normalized [size=" + normCombs.size() + ", time=" + (System.currentTimeMillis() - t) + ']');
+        System.out.println("Norm results:" + normCombs);
+    }
+
+    /**
+     * Removes `candidate` from each row of ROWS.
+     * Return true if result list doesn't contain any row with size > 1.
+     * <p>
+     * If ROWS is {{a, b}, {a, c}}. Candidate {a, b} - ok, candidate {a} - ok, candidate {b} - no.
+     */
+    private static boolean isSuitable(Set<String> candidate) {
+        for (Set<String> row : ROWS) {
+            Set<String> copy = new HashSet<>(row);
+
+            copy.removeAll(candidate);
+
+            if (copy.size() > 1) {
+                return false;
+            }
+        }
+
+        return true;
+    }
+
+    private static Set<Set<String>> squeeze(Set<Set<String>> combs) {
+        Set<Set<String>> normCombs = new HashSet<>();
+
+        for (Set<String> comb : combs.stream().sorted(Comparator.comparingInt(Set::size)).collect(Collectors.toList())) {
+            // Skips already added shorter variants.
+            if (normCombs.stream().filter(comb::containsAll).findAny().isEmpty()) {
+                normCombs.add(comb);
+            }
+        }
+        return normCombs;
+    }
+
+    // Adds "" which means empty row. For example for small row it returns {{A, B, C, ""}, {B, C, D, ""}, {B, D, ""} }
+    private static List<Set<String>> extendNulls() {
+        return ROWS.stream().map(
+            p -> Stream.concat(p.stream(), Stream.of("")).collect(Collectors.toSet())
+        ).collect(Collectors.toList());
+    }
+}
+
diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/Test2.java b/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/Test2.java
new file mode 100644
index 0000000..b8e5e42
--- /dev/null
+++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/Test2.java
@@ -0,0 +1,114 @@
+package org.apache.nlpcraft.probe.mgrs.nlp.enrichers.model;
+
+import com.google.common.collect.ImmutableSet;
+import com.google.common.collect.Sets;
+
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Comparator;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+import java.util.stream.Collectors;
+
+public class Test2 {
+//    private static List<Set<String>> ROWS =
+//        Arrays.asList(
+//            ImmutableSet.of("A", "B", "C"),
+//            ImmutableSet.of("B", "C", "D"),
+//            ImmutableSet.of("B", "D")
+//        );
+
+    // Uncomment it. Works too long time. Normalized result is 256.
+    private static List<Set<String>> ROWS = Arrays.asList(
+        ImmutableSet.of("A", "B"),
+        ImmutableSet.of("C", "B"),
+        ImmutableSet.of("D", "E"),
+        ImmutableSet.of("D", "F"),
+        ImmutableSet.of("G", "H"),
+        ImmutableSet.of("I", "H"),
+        ImmutableSet.of("J", "K"),
+        ImmutableSet.of("L", "K"),
+        ImmutableSet.of("M", "N"),
+        ImmutableSet.of("M", "O"),
+        ImmutableSet.of("P", "Q"),
+        ImmutableSet.of("P", "R"),
+        ImmutableSet.of("S", "T"),
+        ImmutableSet.of("S", "U"),
+        ImmutableSet.of("V", "W"),
+        ImmutableSet.of("X", "W")
+    );
+
+    private static Set<String> ALL = ROWS.stream().flatMap(Collection::stream).collect(Collectors.toSet());
+
+    // Goal: Find minimal set of combinations with following feature.
+    // After removing combination values from each row - list should contain rows with size <= 1.
+
+    // Expected solution: [C, B], [A, C, D], [A, B, D]
+    // Example:
+    // list - [C, B] = {{A}, {D}, {D}}
+    // list - [A, C, D] = {{B}, {B}, {B}}
+    // list - [A, B, D] = {{C}, {C}, {null}}
+
+
+    // Additional. Redundant solutions: [A, B, C] ([C, B] enough),  [A, B, C, D] ([A, C, D] enough) etc
+
+    // Easiest.
+    public static void main(String[] args) {
+        long t = System.currentTimeMillis();
+
+        System.out.println("1. start [time=" + (System.currentTimeMillis() - t) + ']');
+
+        Set<Set<String>> combs = new HashSet<>();
+
+        for (int i = 1; i < ALL.size(); i++) {
+            combs.addAll(
+                Sets.combinations(ALL, i).
+                    stream().
+                    filter(Test2::isSuitable).
+                    collect(Collectors.toSet())
+            );
+        }
+
+        System.out.println("2. calculated [size=" + combs.size() + ", time=" + (System.currentTimeMillis() - t) + ']');
+
+        // Normalize variants (keeps only minimal valid subsets, see task description)
+        Set<Set<String>> normCombs = squeeze(combs);
+
+        System.out.println("3. normalized [size=" + normCombs.size() + ", time=" + (System.currentTimeMillis() - t) + ']');
+        System.out.println("Norm results:" + normCombs);
+    }
+
+    private static Set<Set<String>> squeeze(Set<Set<String>> combs) {
+        Set<Set<String>> normCombs = new HashSet<>();
+
+        for (Set<String> comb : combs.stream().sorted(Comparator.comparingInt(Set::size)).collect(Collectors.toList())) {
+            // Skips already added shorter variants.
+            if (normCombs.stream().filter(comb::containsAll).findAny().isEmpty()) {
+                normCombs.add(comb);
+            }
+        }
+        return normCombs;
+    }
+
+    /**
+     * Removes `candidate` from each row of ROWS.
+     * Return true if result list doesn't contain any row with size > 1.
+     * <p>
+     * If ROWS is {{a, b}, {a, c}}. Candidate {a, b} - ok, candidate {a} - ok, candidate {b} - no.
+     */
+    private static boolean isSuitable(Set<String> candidate) {
+        for (Set<String> row : ROWS) {
+            Set<String> copy = new HashSet<>(row);
+
+            copy.removeAll(candidate);
+
+            if (copy.size() > 1) {
+                return false;
+            }
+        }
+
+        return true;
+    }
+}
+