You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@nlpcraft.apache.org by se...@apache.org on 2020/04/28 12:22:12 UTC

[incubator-nlpcraft] branch NLPCRAFT-30 updated (b235d2d -> b9621eb)

This is an automated email from the ASF dual-hosted git repository.

sergeykamov pushed a change to branch NLPCRAFT-30
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git.


    from b235d2d  Merge branch 'master' into NLPCRAFT-30
     new 06403d3  WIP.
     new b9621eb  WIP.

The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 src/main/resources/nlpcraft.conf                   |   3 +-
 .../nlpcraft/examples/sql/SqlModelTest.scala       |  17 +-
 .../nlpcraft/probe/mgrs/nlp/NCProbeEnricher.scala  |  21 +-
 .../mgrs/nlp/enrichers/limit/NCLimitEnricher.scala |  18 +-
 .../nlp/enrichers/post/NCPostEnrichProcessor.scala | 254 ++++++++++-----------
 .../enrichers/relation/NCRelationEnricher.scala    |  26 ++-
 .../mgrs/nlp/enrichers/sort/NCSortEnricher.scala   |  48 ++--
 .../mgrs/nlp/enrichers/NCDefaultTestModel.scala    |   2 +
 .../mgrs/nlp/enrichers/NCEnricherBaseSpec.scala    |   2 +-
 .../nlp/enrichers/sort/NCEnricherSortSpec.scala    |   7 +-
 10 files changed, 205 insertions(+), 193 deletions(-)

[incubator-nlpcraft] 02/02: WIP.

Posted by se...@apache.org.

This is an automated email from the ASF dual-hosted git repository.

sergeykamov pushed a commit to branch NLPCRAFT-30
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git

commit b9621eb53be7ebe23146551e0ee54c1366af314f
Author: Sergey Kamov <se...@apache.org>
AuthorDate: Tue Apr 28 15:19:59 2020 +0300

    WIP.
---
 .../nlpcraft/examples/sql/SqlModelTest.scala       | 17 +++-------
 .../nlpcraft/probe/mgrs/nlp/NCProbeEnricher.scala  | 21 +++++++++++++
 .../mgrs/nlp/enrichers/limit/NCLimitEnricher.scala | 13 ++++++--
 .../enrichers/relation/NCRelationEnricher.scala    | 22 +++++++++++--
 .../mgrs/nlp/enrichers/sort/NCSortEnricher.scala   | 36 +++++++++++-----------
 .../mgrs/nlp/enrichers/NCDefaultTestModel.scala    |  2 ++
 .../mgrs/nlp/enrichers/NCEnricherBaseSpec.scala    |  2 +-
 .../nlp/enrichers/sort/NCEnricherSortSpec.scala    |  7 ++++-
 8 files changed, 82 insertions(+), 38 deletions(-)

diff --git a/src/main/scala/org/apache/nlpcraft/examples/sql/SqlModelTest.scala b/src/main/scala/org/apache/nlpcraft/examples/sql/SqlModelTest.scala
index eb60c77..fe7851e 100644
--- a/src/main/scala/org/apache/nlpcraft/examples/sql/SqlModelTest.scala
+++ b/src/main/scala/org/apache/nlpcraft/examples/sql/SqlModelTest.scala
@@ -18,7 +18,6 @@
 package org.apache.nlpcraft.examples.sql
 
 import java.util
-import java.util.function.Function
 
 import com.github.difflib.text.DiffRowGenerator
 import com.github.vertical_blank.sqlformatter.SqlFormatter
@@ -44,12 +43,8 @@ class SqlModelTest {
         DiffRowGenerator.create.
             showInlineDiffs(true).
             inlineDiffByWord(true).
-            oldTag(new Function[java.lang.Boolean, String]() {
-                override def apply(t: java.lang.Boolean): String = "~"
-            }).
-            newTag(new Function[java.lang.Boolean, String]() {
-                override def apply(t: java.lang.Boolean): String = "**"
-            }).
+            oldTag((_: java.lang.Boolean) ⇒ "~").
+            newTag((_: java.lang.Boolean) ⇒ "**").
             build
 
     private var client: NCTestClient = _
@@ -91,11 +86,7 @@ class SqlModelTest {
         val errs = collection.mutable.LinkedHashMap.empty[String, String]
 
         cases.
-            flatMap(c ⇒ {
-                val sql = normalize(c.sql)
-
-                c.texts.map(t ⇒ t → sql)
-            }).
+            flatMap(c ⇒ c.texts.map(t ⇒ t → normalize(c.sql))).
             foreach {
                 case (txt, expSqlNorm) ⇒
                     val res = client.ask(txt)
@@ -148,7 +139,7 @@ class SqlModelTest {
             }
 
         if (errs.nonEmpty) {
-            errs.foreach { case (txt, err) ⇒ println(s"Text: $txt\nError: $err\n") }
+            errs.foreach { case (txt, err) ⇒ System.err.println(s"Text: $txt\nError: $err\n") }
 
             throw new Exception(s"Test finished with errors [passed=${cases.size - errs.size}, failed=${errs.size}]")
         }
diff --git a/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/NCProbeEnricher.scala b/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/NCProbeEnricher.scala
index 4f762c9..dda8875 100644
--- a/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/NCProbeEnricher.scala
+++ b/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/NCProbeEnricher.scala
@@ -33,6 +33,27 @@ import scala.language.implicitConversions
  */
 abstract class NCProbeEnricher extends NCService with LazyLogging {
     /**
+      * Checks whether important tokens deleted as stopwords or not.
+      *
+      * @param ns Sentence.
+      * @param toks Tokens in which some stopwords can be deleted.
+      * @param isImportant Token important criteria.
+      */
+    protected def validImportant(
+        ns: NCNlpSentence,
+        toks: Seq[NCNlpSentenceToken],
+        isImportant: NCNlpSentenceToken ⇒ Boolean
+    ): Boolean = {
+        val idxs = toks.map(_.index)
+
+        require(idxs == idxs.sorted)
+
+        val toks2 = ns.slice(idxs.head, idxs.last + 1)
+
+        toks.length == toks2.length || toks.count(isImportant) == toks2.count(isImportant)
+    }
+
+    /**
       *
       * @param toks
       * @param pred
diff --git a/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/limit/NCLimitEnricher.scala b/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/limit/NCLimitEnricher.scala
index b572315..69500a8 100644
--- a/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/limit/NCLimitEnricher.scala
+++ b/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/limit/NCLimitEnricher.scala
@@ -150,6 +150,7 @@ object NCLimitEnricher extends NCProbeEnricher {
         s"$CD of",
         s"$CD <POST_WORDS>",
         s"<POST_WORDS> $CD"
+
     )
 
     private final val LIMITS: Seq[String] = {
@@ -164,6 +165,8 @@ object NCLimitEnricher extends NCProbeEnricher {
         SYNONYMS.flatMap(parser.expand).distinct
     }
 
+    private final val TECH_WORDS = (SORT_WORDS.keys ++ TOP_WORDS ++ POST_WORDS ++ FUZZY_NUMS.keySet).toSet
+
     /**
       * Stemmatizes map's keys.
       *
@@ -190,10 +193,13 @@ object NCLimitEnricher extends NCProbeEnricher {
             "txt" → ns.text) { _ ⇒
             val numsMap = NCNumericManager.find(ns).filter(_.unit.isEmpty).map(p ⇒ p.tokens → p).toMap
             val groupsMap = groupNums(ns, numsMap.values)
+            def isImportant(t: NCNlpSentenceToken): Boolean = t.isUser || TECH_WORDS.contains(t.stem)
 
             // Tries to grab tokens reverse way.
             // Example: A, B, C ⇒ ABC, BC, AB .. (BC will be processed first)
-            for (toks ← ns.tokenMixWithStopWords().sortBy(p ⇒ (-p.size, -p.head.index)))
+            for (toks ← ns.tokenMixWithStopWords().sortBy(p ⇒ (-p.size, -p.head.index))
+                 if validImportant(ns, toks, isImportant)
+            )
                 tryToMatch(numsMap, groupsMap, toks) match {
                     case Some(m) ⇒
                         for (refNote ← m.refNotes) {
@@ -224,7 +230,10 @@ object NCLimitEnricher extends NCProbeEnricher {
         groupsMap: Map[Seq[NCNlpSentenceToken], GroupsHolder],
         toks: Seq[NCNlpSentenceToken]
     ): Option[Match] = {
-        val refCands = toks.filter(_.exists(_.isUser))
+        val i1 = toks.head.index
+        val i2 = toks.last.index
+
+        val refCands = toks.filter(_.exists(n ⇒ n.isUser && n.tokenIndexes.head >= i1 && n.tokenIndexes.last <= i2))
         val commonNotes = getCommonNotes(refCands)
 
         if (commonNotes.nonEmpty) {
diff --git a/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/relation/NCRelationEnricher.scala b/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/relation/NCRelationEnricher.scala
index 05861e1..dab01e9 100644
--- a/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/relation/NCRelationEnricher.scala
+++ b/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/relation/NCRelationEnricher.scala
@@ -60,6 +60,7 @@ object NCRelationEnricher extends NCProbeEnricher {
     )
 
     private var FUNCS: Seq[Holder] = _
+    private var ALL_FUNC_STEMS: Set[String] = _
 
     /**
       * Starts this component.
@@ -108,6 +109,8 @@ object NCRelationEnricher extends NCProbeEnricher {
             seq.sortBy(-_.allStems.size)
         }
 
+        ALL_FUNC_STEMS = FUNCS.flatMap(_.allStems).toSet
+
         super.start()
     }
 
@@ -123,10 +126,13 @@ object NCRelationEnricher extends NCProbeEnricher {
             "txt" → ns.text) { _ ⇒
             // Tries to grab tokens direct way.
             // Example: A, B, C ⇒ ABC, AB, BC .. (AB will be processed first)
-            for (toks ← ns.tokenMixWithStopWords())
+
+            def isImportant(t: NCNlpSentenceToken): Boolean =
+                t.exists(n ⇒ n.isUser || REL_TYPES.contains(n.noteType)) || ALL_FUNC_STEMS.contains(t.stem)
+
+            for (toks ← ns.tokenMixWithStopWords() if validImportant(ns, toks, isImportant))
                 tryToMatch(toks) match {
                     case Some(m) ⇒
-                        //for (refNote ← m.refNotes if !hasReference(TOK_ID, "note", refNote, Seq(m.matched.head))) {
                         for (refNote ← m.refNotes) {
                             val note = NCNlpSentenceNote(
                                 Seq(m.matchedHead.index),
@@ -186,7 +192,17 @@ object NCRelationEnricher extends NCProbeEnricher {
       * @param toks
       */
     private def tryToMatch(toks: Seq[NCNlpSentenceToken]): Option[Match] = {
-        var refOpts = toks.filter(t ⇒ t.exists(n ⇒ n.isUser || REL_TYPES.contains(n.noteType)))
+        val i1 = toks.head.index
+        val i2 = toks.last.index
+
+        var refOpts = toks.
+            filter(t ⇒
+                t.exists(n ⇒ (
+                    n.isUser || REL_TYPES.contains(n.noteType)) &&
+                    n.tokenIndexes.head >= i1 &&
+                    n.tokenIndexes.last <= i2
+                )
+            )
         val matchOpts = toks.diff(refOpts)
 
         if (refOpts.nonEmpty && matchOpts.nonEmpty)
diff --git a/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/sort/NCSortEnricher.scala b/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/sort/NCSortEnricher.scala
index 96e6b30..3cf17dc 100644
--- a/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/sort/NCSortEnricher.scala
+++ b/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/sort/NCSortEnricher.scala
@@ -228,7 +228,7 @@ object NCSortEnricher extends NCProbeEnricher {
                 Seq.empty
 
         if (res.isEmpty && !nullable)
-            throw new AssertionError(s"Invalid null result " +
+            throw new AssertionError(s"Invalid empty result " +
                 s"[tokensTexts=[${toks.map(_.origText).mkString("|")}]" +
                 s", notes=[${toks.flatten.map(n ⇒ s"${n.noteType}:[${n.tokenIndexes.mkString(",")}]").mkString("|")}]" +
                 s", tokensIndexes=[${toks.map(_.index).mkString("|")}]" +
@@ -253,19 +253,15 @@ object NCSortEnricher extends NCProbeEnricher {
         def extract(keyStems: Seq[String], used: Seq[NCNlpSentenceToken]): Option[KeyWord] = {
             require(keyStems.nonEmpty)
 
-            if (toks.nonEmpty) {
-                val maxWords = keyStems.map(_.count(_ == ' ')).max + 1
+            val maxWords = keyStems.map(_.count(_ == ' ')).max + 1
 
-                (1 to maxWords).reverse.flatMap(i ⇒
-                    toks.sliding(i).filter(toks ⇒ used.intersect(toks).isEmpty).
-                        map(toks ⇒ toks.map(_.stem).mkString(" ") → toks).toMap.
-                        flatMap { case (stem, stemToks) ⇒
-                            if (keyStems.contains(stem)) Some(KeyWord(stemToks, keyStems.indexOf(stem))) else None
-                        }.toStream.headOption
-                ).toStream.headOption
-            }
-            else
-                None
+            (1 to maxWords).reverse.flatMap(i ⇒
+                toks.sliding(i).filter(toks ⇒ used.intersect(toks).isEmpty).
+                    map(toks ⇒ toks.map(_.stem).mkString(" ") → toks).toMap.
+                    flatMap { case (stem, stemToks) ⇒
+                        if (keyStems.contains(stem)) Some(KeyWord(stemToks, keyStems.indexOf(stem))) else None
+                    }.toStream.headOption
+            ).toStream.headOption
         }
 
         var res: Option[Match] = None
@@ -298,7 +294,12 @@ object NCSortEnricher extends NCProbeEnricher {
             val others = toks.filter(t ⇒ !all.contains(t))
 
             if (others.nonEmpty) {
-                val othersRefs = others.filter(_.exists(_.isUser))
+                val i1 = others.head.index
+                val i2 = others.last.index
+
+                val othersRefs = others.filter(
+                    t ⇒ t.exists(n ⇒ n.isUser && n.tokenIndexes.head >= i1 && n.tokenIndexes.last <= i2)
+                )
 
                 if (
                     othersRefs.nonEmpty &&
@@ -410,9 +411,9 @@ object NCSortEnricher extends NCProbeEnricher {
             "srvReqId" → ns.srvReqId,
             "modelId" → mdl.model.getId,
             "txt" → ns.text) { _ ⇒
-            val buf = mutable.Buffer.empty[Set[NCNlpSentenceToken]]
+            def isImportant(t: NCNlpSentenceToken): Boolean = t.isUser || MASK_WORDS.contains(t.stem)
 
-            for (toks ← ns.tokenMixWithStopWords())
+            for (toks ← ns.tokenMixWithStopWords() if validImportant(ns, toks, isImportant)) {
                 tryToMatch(toks) match {
                     case Some(m) ⇒
                         def addNotes(
@@ -432,8 +433,6 @@ object NCSortEnricher extends NCProbeEnricher {
 
                             m.main.foreach(_.add(note))
                             m.stop.foreach(_.addStopReason(note))
-
-                            buf += toks.toSet
                         }
 
                         def mkParams(): mutable.ArrayBuffer[(String, Any)] = {
@@ -465,6 +464,7 @@ object NCSortEnricher extends NCProbeEnricher {
 
                     case None ⇒ // No-op.
                 }
+            }
         }
 
     override def start(parent: Span = null): NCService = startScopedSpan("start", parent) { _ ⇒
diff --git a/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/NCDefaultTestModel.scala b/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/NCDefaultTestModel.scala
index 4048005..8deb578 100644
--- a/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/NCDefaultTestModel.scala
+++ b/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/NCDefaultTestModel.scala
@@ -65,6 +65,8 @@ class NCDefaultTestModel extends NCModelAdapter(ID, "Model enrichers test", "1.0
         NCResult.text(
             NCTestSentence.serialize(ctx.getVariants.asScala.map(v ⇒ NCTestSentence(v.asScala.map(NCTestToken(_)))))
         )
+
+    final override def getId: String = ID
 }
 
 object NCDefaultTestModel {
diff --git a/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/NCEnricherBaseSpec.scala b/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/NCEnricherBaseSpec.scala
index f919f49..cd436ad 100644
--- a/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/NCEnricherBaseSpec.scala
+++ b/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/NCEnricherBaseSpec.scala
@@ -37,7 +37,7 @@ class NCEnricherBaseSpec {
 
         getModelClass match {
             case Some(claxx) ⇒
-                println(s"Embedded probe is going to start with model: $mdlId")
+                println(s"Embedded probe is going to start with model [id=$mdlId, claxx=${claxx.getName}]")
 
                 NCEmbeddedProbe.start(claxx)
             case None ⇒
diff --git a/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/sort/NCEnricherSortSpec.scala b/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/sort/NCEnricherSortSpec.scala
index 187e0d8..c3afde1 100644
--- a/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/sort/NCEnricherSortSpec.scala
+++ b/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/sort/NCEnricherSortSpec.scala
@@ -18,7 +18,7 @@
 package org.apache.nlpcraft.probe.mgrs.nlp.enrichers.sort
 
 import org.apache.nlpcraft.probe.mgrs.nlp.enrichers.NCTestSortTokenType._
-import org.apache.nlpcraft.probe.mgrs.nlp.enrichers.{NCEnricherBaseSpec, NCTestNlpToken => nlp, NCTestSortToken => srt, NCTestUserToken => usr}
+import org.apache.nlpcraft.probe.mgrs.nlp.enrichers.{NCEnricherBaseSpec, NCTestNlpToken ⇒ nlp, NCTestSortToken ⇒ srt, NCTestUserToken ⇒ usr}
 import org.junit.jupiter.api.Test
 
 /**
@@ -146,6 +146,11 @@ class NCEnricherSortSpec extends NCEnricherBaseSpec {
                     usr(text = "A", id = "A"),
                     usr(text = "B", id = "B"),
                     srt(text = "classify", subjNotes = Seq("A", "B"), subjIndexes = Seq(0, 1))
+                ),
+                Seq(
+                    usr(text = "A", id = "A"),
+                    usr(text = "B", id = "B"),
+                    srt(text = "classify", subjNotes = Seq("B"), subjIndexes = Seq(1))
                 )
             ),
             _ ⇒ checkAll(

[incubator-nlpcraft] 01/02: WIP.

Posted by se...@apache.org.

This is an automated email from the ASF dual-hosted git repository.

sergeykamov pushed a commit to branch NLPCRAFT-30
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git

commit 06403d3328262de82ab2bf7af7f1a134283b8087
Author: Sergey Kamov <se...@apache.org>
AuthorDate: Tue Apr 28 11:26:07 2020 +0300

    WIP.
---
 src/main/resources/nlpcraft.conf                   |   3 +-
 .../nlpcraft/probe/mgrs/nlp/NCProbeEnricher.scala  |   8 -
 .../mgrs/nlp/enrichers/limit/NCLimitEnricher.scala |   7 +-
 .../nlp/enrichers/post/NCPostEnrichProcessor.scala | 254 ++++++++++-----------
 .../enrichers/relation/NCRelationEnricher.scala    |   6 +-
 .../mgrs/nlp/enrichers/sort/NCSortEnricher.scala   |  14 +-
 .../nlp/enrichers/sort/NCEnricherSortSpec.scala    |   2 +-
 7 files changed, 131 insertions(+), 163 deletions(-)

diff --git a/src/main/resources/nlpcraft.conf b/src/main/resources/nlpcraft.conf
index b9d2bd0..5d3917c 100644
--- a/src/main/resources/nlpcraft.conf
+++ b/src/main/resources/nlpcraft.conf
@@ -234,7 +234,8 @@ nlpcraft {
         # NOTE: These is an example.
         #       Start data probe with this configuration file to deploy example data model.
         models = [
-            "org.apache.nlpcraft.examples.echo.EchoModel"
+            "org.apache.nlpcraft.examples.echo.EchoModel",
+            "org.apache.nlpcraft.examples.sql.SqlModel"
 
             // Requires 'google' on the server side.
             // See https://nlpcraft.apache.org/integrations.html#nlp for more details
diff --git a/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/NCProbeEnricher.scala b/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/NCProbeEnricher.scala
index a0fcb06..4f762c9 100644
--- a/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/NCProbeEnricher.scala
+++ b/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/NCProbeEnricher.scala
@@ -34,14 +34,6 @@ import scala.language.implicitConversions
 abstract class NCProbeEnricher extends NCService with LazyLogging {
     /**
       *
-      * @param buf
-      * @param toks
-      */
-    protected def areSuitableTokens(buf: Seq[Set[NCNlpSentenceToken]], toks: Seq[NCNlpSentenceToken]): Boolean =
-        !buf.exists(_.exists(toks.contains)) && toks.forall(t ⇒ !t.isQuoted && !t.isBracketed)
-
-    /**
-      *
       * @param toks
       * @param pred
       */
diff --git a/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/limit/NCLimitEnricher.scala b/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/limit/NCLimitEnricher.scala
index 4b2648f..b572315 100644
--- a/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/limit/NCLimitEnricher.scala
+++ b/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/limit/NCLimitEnricher.scala
@@ -191,14 +191,11 @@ object NCLimitEnricher extends NCProbeEnricher {
             val numsMap = NCNumericManager.find(ns).filter(_.unit.isEmpty).map(p ⇒ p.tokens → p).toMap
             val groupsMap = groupNums(ns, numsMap.values)
 
-            val buf = mutable.Buffer.empty[Set[NCNlpSentenceToken]]
-
             // Tries to grab tokens reverse way.
             // Example: A, B, C ⇒ ABC, BC, AB .. (BC will be processed first)
-            for (toks ← ns.tokenMixWithStopWords().sortBy(p ⇒ (-p.size, -p.head.index)) if areSuitableTokens(buf, toks))
+            for (toks ← ns.tokenMixWithStopWords().sortBy(p ⇒ (-p.size, -p.head.index)))
                 tryToMatch(numsMap, groupsMap, toks) match {
                     case Some(m) ⇒
-                        //for (refNote ← m.refNotes if !hasReference(TOK_ID, "note", refNote, m.matched)) {
                         for (refNote ← m.refNotes) {
                             val params = mutable.ArrayBuffer.empty[(String, Any)]
 
@@ -212,8 +209,6 @@ object NCLimitEnricher extends NCProbeEnricher {
                             val note = NCNlpSentenceNote(m.matched.map(_.index), TOK_ID, params: _*)
 
                             m.matched.foreach(_.add(note))
-
-                            buf += toks.toSet
                         }
                     case None ⇒ // No-op.
                 }
diff --git a/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/post/NCPostEnrichProcessor.scala b/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/post/NCPostEnrichProcessor.scala
index cfeeb7e..86b36ad 100644
--- a/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/post/NCPostEnrichProcessor.scala
+++ b/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/post/NCPostEnrichProcessor.scala
@@ -51,142 +51,123 @@ object NCPostEnrichProcessor extends NCService with LazyLogging {
 
     /**
       *
-      * @param p
+      * @param note
+      * @param withIndexes
       * @return
       */
-    private def getParameters(p: NCNlpSentenceNote): Any =
-        if (p.isUser)
-            (p.wordIndexes, p.noteType)
-        else {
-            p.noteType match {
-                case "nlpcraft:continent" ⇒
-                    (
-                        p.wordIndexes,
-                        p.noteType,
-                        p.get("continent")
-                    )
-                case "nlpcraft:subcontinent" ⇒
-                    (
-                        p.wordIndexes,
-                        p.noteType,
-                        p.get("continent"),
-                        p.get("subcontinent")
-                    )
-                case "nlpcraft:country" ⇒
-                    (
-                        p.wordIndexes,
-                        p.noteType,
-                        p.get("continent"),
-                        p.get("subcontinent"),
-                        p.get("country")
-                    )
-                case "nlpcraft:region" ⇒
-                    (
-                        p.wordIndexes,
-                        p.noteType,
-                        p.get("continent"),
-                        p.get("subcontinent"),
-                        p.get("country"),
-                        p.get("region")
-                    )
-                case "nlpcraft:city" ⇒
-                    (
-                        p.wordIndexes,
-                        p.noteType,
-                        p.get("continent"),
-                        p.get("subcontinent"),
-                        p.get("country"),
-                        p.get("region"),
-                        p.get("city")
-                    )
-                case "nlpcraft:metro" ⇒
-                    (
-                        p.wordIndexes,
-                        p.noteType,
-                        p.get("metro")
-                    )
-                case "nlpcraft:date" ⇒
-                    (
-                        p.wordIndexes,
-                        p.noteType,
-                        p.get("from"),
-                        p.get("to")
-                    )
-                case "nlpcraft:relation" ⇒
-                    (
-                        p.wordIndexes,
-                        p.noteType,
-                        p.get("type"),
-                        p.get("indexes"),
-                        p.get("note")
-                    )
-                case "nlpcraft:sort" ⇒
-                    (
-                        p.wordIndexes,
-                        p.noteType,
-                        p.get("subjnotes"),
-                        p.get("subjindexes"),
-                        p.getOrElse("bynotes", null),
-                        p.getOrElse("byindexes", null),
-                        p.getOrElse("asc", null)
-                    )
-                case "nlpcraft:limit" ⇒
-                    (
-                        p.wordIndexes,
-                        p.noteType,
-                        p.get("limit"),
-                        p.getOrElse("asc", null),
-                        p.get("indexes"),
-                        p.get("note")
-                    )
-                case "nlpcraft:coordinate" ⇒
-                    (
-                        p.wordIndexes,
-                        p.noteType,
-                        p.get("latitude"),
-                        p.get("longitude")
-                    )
-                case "nlpcraft:num" ⇒
-                    (
-                        p.wordIndexes,
-                        p.noteType,
-                        p.get("from"),
-                        p.get("to"),
-                        p.getOrElse("indexes", null),
-                        p.getOrElse("note", null)
+    private def getParameters(note: NCNlpSentenceNote, withIndexes: Boolean = true): Any = {
+        val seq1 = if (withIndexes) Seq(note.wordIndexes, note.noteType) else Seq(note.noteType)
+
+        val seq2: Seq[Any] =
+            if (note.isUser)
+                Seq.empty
+            else {
+                note.noteType match {
+                    case "nlpcraft:continent" ⇒
+                        Seq(
+                            note.get("continent")
+                        )
+                    case "nlpcraft:subcontinent" ⇒
+                        Seq(
+                            note.get("continent"),
+                            note.get("subcontinent")
+                        )
+                    case "nlpcraft:country" ⇒
+                        Seq(
+                            note.get("continent"),
+                            note.get("subcontinent"),
+                            note.get("country")
+                        )
+                    case "nlpcraft:region" ⇒
+                        Seq(
+                            note.wordIndexes,
+                            note.noteType,
+                            note.get("continent"),
+                            note.get("subcontinent"),
+                            note.get("country"),
+                            note.get("region")
+                        )
+                    case "nlpcraft:city" ⇒
+                        Seq(
+                            note.get("continent"),
+                            note.get("subcontinent"),
+                            note.get("country"),
+                            note.get("region"),
+                            note.get("city")
+                        )
+                    case "nlpcraft:metro" ⇒
+                        Seq(
+                            note.get("metro")
+                        )
+                    case "nlpcraft:date" ⇒
+                        Seq(
+                            note.get("from"),
+                            note.get("to")
+                        )
+                    case "nlpcraft:relation" ⇒
+                        Seq(
+                            note.get("type"),
+                            note.get("indexes"),
+                            note.get("note")
+                        )
+                    case "nlpcraft:sort" ⇒
+                        Seq(
+                            note.wordIndexes,
+                            note.noteType,
+                            note.get("subjnotes"),
+                            note.get("subjindexes"),
+                            note.getOrElse("bynotes", null),
+                            note.getOrElse("byindexes", null),
+                            note.getOrElse("asc", null)
+                        )
+                    case "nlpcraft:limit" ⇒
+                        Seq(
+                            note.get("limit"),
+                            note.getOrElse("asc", null),
+                            note.get("indexes"),
+                            note.get("note")
+                        )
+                    case "nlpcraft:coordinate" ⇒
+                        Seq(
+                            note.get("latitude"),
+                            note.get("longitude")
+                        )
+                    case "nlpcraft:num" ⇒
+                        Seq(
+                            note.get("from"),
+                            note.get("to"),
+                            note.getOrElse("indexes", null),
+                            note.getOrElse("note", null)
 
-                    )
-                case x if x.startsWith("google:") ⇒
-                    (
-                        p.wordIndexes,
-                        p.noteType,
-                        p.get("meta"),
-                        p.get("mentionsBeginOffsets"),
-                        p.get("mentionsContents"),
-                        p.get("mentionsTypes")
-                    )
-                case x if x.startsWith("stanford:") ⇒
-                    (
-                        p.wordIndexes,
-                        p.noteType,
-                        p.get("nne")
-                    )
-                case x if x.startsWith("opennlp:") ⇒
-                    (
-                        p.wordIndexes,
-                        p.noteType
-                    )
-                case x if x.startsWith("spacy:") ⇒
-                    (
-                        p.wordIndexes,
-                        p.noteType,
-                        p.get("vector")
-                    )
+                        )
+                    case x if x.startsWith("google:") ⇒
+                        Seq(
+                            note.get("meta"),
+                            note.get("mentionsBeginOffsets"),
+                            note.get("mentionsContents"),
+                            note.get("mentionsTypes")
+                        )
+                    case x if x.startsWith("stanford:") ⇒
+                        Seq(
+                            note.get("nne")
+                        )
+                    case x if x.startsWith("opennlp:") ⇒
+                        Seq(
+                            note.wordIndexes,
+                            note.noteType
+                        )
+                    case x if x.startsWith("spacy:") ⇒
+                        Seq(
+                            note.get("vector")
+                        )
 
-                case _ ⇒ throw new AssertionError(s"Unexpected note type: ${p.noteType}")
-            }
+                    case _ ⇒ throw new AssertionError(s"Unexpected note type: ${note.noteType}")
+                }
         }
 
+        seq1 ++ seq2
+    }
+
     /**
       * Fixes tokens positions.
       *
@@ -622,10 +603,9 @@ object NCPostEnrichProcessor extends NCService with LazyLogging {
             // Some words with same note type can be detected various ways.
             // We keep only one variant -  with `best` direct and sparsity parameters,
             // other variants for these words are redundant.
-
             val redundant: Seq[NCNlpSentenceNote] =
                 ns.flatten.filter(!_.isNlp).distinct.
-                    groupBy(getParameters).
+                    groupBy(p ⇒ getParameters(p)).
                     map(p ⇒ p._2.sortBy(p ⇒
                         (
                             // System notes don't have such flags.
@@ -689,7 +669,7 @@ object NCPostEnrichProcessor extends NCService with LazyLogging {
                                     None
                             )
 
-                    // Removes sentences which have only one difference - 'direct' flag of their user tokens.
+                    // It removes sentences which have only one difference - 'direct' flag of their user tokens.
                     // `Direct` sentences have higher priority.
                     case class Key(
                         sysNotes: Seq[Map[String, java.io.Serializable]],
@@ -735,12 +715,16 @@ object NCPostEnrichProcessor extends NCService with LazyLogging {
                     tok.size match {
                         case 1 ⇒ require(tok.head.isNlp, s"Unexpected non-'nlpcraft:nlp' token: $tok")
                         case 2 ⇒ require(tok.head.isNlp ^ tok.last.isNlp, s"Unexpected token notes: $tok")
-                        case _ ⇒ require(false, s"Unexpected token notes count: $tok")
+                        case _ ⇒ require(requirement = false, s"Unexpected token notes count: $tok")
                     }
                 )
             )
 
-            sens
+            // Drops similar sentences (with same tokens structure).
+            // Among similar sentences we prefer one with minimal free words count.
+            sens.groupBy(_.flatten.filter(!_.isNlp).map(note ⇒ getParameters(note, withIndexes = false))).
+            map { case (_, seq) ⇒ seq.minBy(_.filter(p ⇒ p.isNlp && !p.isStopWord).map(_.wordIndexes.length).sum) }.
+            toSeq
         }
 
     /**
diff --git a/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/relation/NCRelationEnricher.scala b/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/relation/NCRelationEnricher.scala
index 7fc2265..05861e1 100644
--- a/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/relation/NCRelationEnricher.scala
+++ b/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/relation/NCRelationEnricher.scala
@@ -121,11 +121,9 @@ object NCRelationEnricher extends NCProbeEnricher {
             "srvReqId" → ns.srvReqId,
             "modelId" → mdl.model.getId,
             "txt" → ns.text) { _ ⇒
-            val buf = mutable.Buffer.empty[Set[NCNlpSentenceToken]]
-
             // Tries to grab tokens direct way.
             // Example: A, B, C ⇒ ABC, AB, BC .. (AB will be processed first)
-            for (toks ← ns.tokenMixWithStopWords() if areSuitableTokens(buf, toks))
+            for (toks ← ns.tokenMixWithStopWords())
                 tryToMatch(toks) match {
                     case Some(m) ⇒
                         //for (refNote ← m.refNotes if !hasReference(TOK_ID, "note", refNote, Seq(m.matched.head))) {
@@ -141,8 +139,6 @@ object NCRelationEnricher extends NCProbeEnricher {
                             m.matched.filter(_ != m.matchedHead).foreach(_.addStopReason(note))
 
                             m.matchedHead.add(note)
-
-                            buf += toks.toSet
                         }
                     case None ⇒ // No-op.
                 }
diff --git a/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/sort/NCSortEnricher.scala b/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/sort/NCSortEnricher.scala
index 282ad72..96e6b30 100644
--- a/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/sort/NCSortEnricher.scala
+++ b/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/sort/NCSortEnricher.scala
@@ -244,13 +244,13 @@ object NCSortEnricher extends NCProbeEnricher {
       * @param toks
       */
     private def tryToMatch(toks: Seq[NCNlpSentenceToken]): Option[Match] = {
+        require(toks.nonEmpty)
+
         case class KeyWord(tokens: Seq[NCNlpSentenceToken], synonymIndex: Int) {
             require(tokens.nonEmpty)
         }
 
-        def extract(
-            keyStems: Seq[String], toks: Seq[NCNlpSentenceToken], used: Seq[NCNlpSentenceToken] = Seq.empty
-        ): Option[KeyWord] = {
+        def extract(keyStems: Seq[String], used: Seq[NCNlpSentenceToken]): Option[KeyWord] = {
             require(keyStems.nonEmpty)
 
             if (toks.nonEmpty) {
@@ -272,9 +272,9 @@ object NCSortEnricher extends NCProbeEnricher {
 
         // Order is important.
         // SORT and ORDER don't have same words (validated)
-        val orderOpt = extract(ORDER.map(_._1), toks)
-        val byOpt = extract(BY, toks, used = orderOpt.toSeq.flatMap(_.tokens))
-        val sortOpt = extract(SORT, toks, used = orderOpt.toSeq.flatMap(_.tokens) ++ byOpt.toSeq.flatMap(_.tokens))
+        val orderOpt = extract(ORDER.map(_._1), used = Seq.empty)
+        val byOpt = extract(BY, used = orderOpt.toSeq.flatMap(_.tokens))
+        val sortOpt = extract(SORT, used = orderOpt.toSeq.flatMap(_.tokens) ++ byOpt.toSeq.flatMap(_.tokens))
 
         if (sortOpt.nonEmpty || orderOpt.nonEmpty) {
             val sortToks = sortOpt.toSeq.flatMap(_.tokens)
@@ -412,7 +412,7 @@ object NCSortEnricher extends NCProbeEnricher {
             "txt" → ns.text) { _ ⇒
             val buf = mutable.Buffer.empty[Set[NCNlpSentenceToken]]
 
-            for (toks ← ns.tokenMixWithStopWords() if areSuitableTokens(buf, toks))
+            for (toks ← ns.tokenMixWithStopWords())
                 tryToMatch(toks) match {
                     case Some(m) ⇒
                         def addNotes(
diff --git a/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/sort/NCEnricherSortSpec.scala b/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/sort/NCEnricherSortSpec.scala
index dd25663..187e0d8 100644
--- a/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/sort/NCEnricherSortSpec.scala
+++ b/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/sort/NCEnricherSortSpec.scala
@@ -17,8 +17,8 @@
 
 package org.apache.nlpcraft.probe.mgrs.nlp.enrichers.sort
 
-import org.apache.nlpcraft.probe.mgrs.nlp.enrichers.{NCEnricherBaseSpec, NCTestNlpToken ⇒ nlp, NCTestSortToken ⇒ srt, NCTestUserToken ⇒ usr}
 import org.apache.nlpcraft.probe.mgrs.nlp.enrichers.NCTestSortTokenType._
+import org.apache.nlpcraft.probe.mgrs.nlp.enrichers.{NCEnricherBaseSpec, NCTestNlpToken => nlp, NCTestSortToken => srt, NCTestUserToken => usr}
 import org.junit.jupiter.api.Test
 
 /**