You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nlpcraft.apache.org by se...@apache.org on 2020/05/11 08:37:21 UTC

[incubator-nlpcraft] 02/02: Limit enricher fixed.

This is an automated email from the ASF dual-hosted git repository.

sergeykamov pushed a commit to branch NLPCRAFT-30
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git

commit bb678a56d9c3858d74cda84c769be37e0e8f4fe8
Author: Sergey Kamov <se...@apache.org>
AuthorDate: Mon May 11 11:37:07 2020 +0300

    Limit enricher fixed.
---
 .../nlpcraft/examples/sql/SqlModelTest.scala       |  2 +-
 .../mgrs/nlp/enrichers/limit/NCLimitEnricher.scala | 20 +++++++++++++++--
 .../mgrs/nlp/enrichers/sort/NCSortEnricher.scala   | 26 +++++++++++++++++-----
 .../nlpcraft/examples/sql/NCSqlModelSpec.scala     | 21 +++--------------
 4 files changed, 43 insertions(+), 26 deletions(-)

diff --git a/src/main/scala/org/apache/nlpcraft/examples/sql/SqlModelTest.scala b/src/main/scala/org/apache/nlpcraft/examples/sql/SqlModelTest.scala
index f6294f1..92531c0 100644
--- a/src/main/scala/org/apache/nlpcraft/examples/sql/SqlModelTest.scala
+++ b/src/main/scala/org/apache/nlpcraft/examples/sql/SqlModelTest.scala
@@ -642,7 +642,7 @@ class SqlModelTest {
                 Seq(
                     "What are the least performing categories for the last quarter?"
                 ),
-                """SELECT
+                 """SELECT
                   |  orders.freight,
                   |  orders.order_date,
                   |  categories.category_id,
diff --git a/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/limit/NCLimitEnricher.scala b/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/limit/NCLimitEnricher.scala
index 70ff977..23be9fd 100644
--- a/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/limit/NCLimitEnricher.scala
+++ b/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/limit/NCLimitEnricher.scala
@@ -175,6 +175,22 @@ object NCLimitEnricher extends NCProbeEnricher {
     private def stemmatizeWords[T](m: Map[String, T]): Map[String, T] = m.map(p ⇒ NCNlpCoreManager.stem(p._1) → p._2)
 
     /**
+      *
+      * @param t
+      */
+    private def isUserNotValue(t: NCNlpSentenceToken): Boolean =
+        t.find(_.isUser) match {
+            case Some(n) ⇒ !n.contains("value")
+            case None ⇒ false
+        }
+
+    /**
+      *
+      * @param n
+      */
+    private def isUserNotValue(n: NCNlpSentenceNote): Boolean = n.isUser && !n.contains("value")
+
+    /**
       * Starts this component.
       */
     override def start(parent: Span = null): NCService = startScopedSpan("start", parent) { _ ⇒
@@ -195,7 +211,7 @@ object NCLimitEnricher extends NCProbeEnricher {
             val numsMap = NCNumericManager.find(ns).filter(_.unit.isEmpty).map(p ⇒ p.tokens → p).toMap
             val groupsMap = groupNums(ns, numsMap.values)
 
-            def isImportant(t: NCNlpSentenceToken): Boolean = t.isUser || TECH_WORDS.contains(t.stem)
+            def isImportant(t: NCNlpSentenceToken): Boolean = isUserNotValue(t) || TECH_WORDS.contains(t.stem)
 
             // Tries to grab tokens reverse way.
             // Example: A, B, C ⇒ ABC, BC, AB .. (BC will be processed first)
@@ -271,7 +287,7 @@ object NCLimitEnricher extends NCProbeEnricher {
         val i1 = toks.head.index
         val i2 = toks.last.index
 
-        val refCands = toks.filter(_.exists(n ⇒ n.isUser && n.tokenIndexes.head >= i1 && n.tokenIndexes.last <= i2))
+        val refCands = toks.filter(_.exists(n ⇒ isUserNotValue(n) && n.tokenIndexes.head >= i1 && n.tokenIndexes.last <= i2))
 
         // Reference should be last.
         if (refCands.nonEmpty && refCands.last.index == toks.last.index) {
diff --git a/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/sort/NCSortEnricher.scala b/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/sort/NCSortEnricher.scala
index 9b21322..bd9df0e 100644
--- a/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/sort/NCSortEnricher.scala
+++ b/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/sort/NCSortEnricher.scala
@@ -205,8 +205,8 @@ object NCSortEnricher extends NCProbeEnricher {
                     between.isEmpty || between.forall(p ⇒ p.isStopWord || p.stem == STEM_AND)
                 }
 
-                val minIdx = toks.dropWhile(!_.isUser).head.index
-                val maxIdx = toks.reverse.dropWhile(!_.isUser).head.index
+                val minIdx = toks.dropWhile(t ⇒ !isUserNotValue(t)).head.index
+                val maxIdx = toks.reverse.dropWhile(t ⇒ !isUserNotValue(t)).head.index
 
                 require(minIdx <= maxIdx)
 
@@ -242,6 +242,22 @@ object NCSortEnricher extends NCProbeEnricher {
 
     /**
       *
+      * @param t
+      */
+    private def isUserNotValue(t: NCNlpSentenceToken): Boolean =
+        t.find(_.isUser) match {
+            case Some(n) ⇒ !n.contains("value")
+            case None ⇒ false
+        }
+
+    /**
+      *
+      * @param n
+      */
+    private def isUserNotValue(n: NCNlpSentenceNote): Boolean = n.isUser && !n.contains("value")
+
+    /**
+      *
       * @param toks
       */
     private def tryToMatch(toks: Seq[NCNlpSentenceToken]): Option[Match] = {
@@ -287,7 +303,7 @@ object NCSortEnricher extends NCProbeEnricher {
                     "BY"
                 else if (orderToks.contains(t))
                     "ORDER"
-                else if (t.isUser)
+                else if (isUserNotValue(t))
                     "x"
                 else
                     "-"
@@ -299,7 +315,7 @@ object NCSortEnricher extends NCProbeEnricher {
                 val i2 = others.last.index
 
                 val othersRefs = others.filter(
-                    t ⇒ t.exists(n ⇒ n.isUser && n.tokenIndexes.head >= i1 && n.tokenIndexes.last <= i2)
+                    t ⇒ t.exists(n ⇒ isUserNotValue(n) && n.tokenIndexes.head >= i1 && n.tokenIndexes.last <= i2)
                 )
 
                 if (
@@ -413,7 +429,7 @@ object NCSortEnricher extends NCProbeEnricher {
             "modelId" → mdl.model.getId,
             "txt" → ns.text) { _ ⇒
             val notes = mutable.HashSet.empty[NCNlpSentenceNote]
-            def isImportant(t: NCNlpSentenceToken): Boolean = t.isUser || MASK_WORDS.contains(t.stem)
+            def isImportant(t: NCNlpSentenceToken): Boolean = isUserNotValue(t) || MASK_WORDS.contains(t.stem)
 
             for (toks ← ns.tokenMixWithStopWords() if NCEnricherProcessor.validImportant(ns, toks, isImportant)) {
                 tryToMatch(toks) match {
diff --git a/src/test/scala/org/apache/nlpcraft/examples/sql/NCSqlModelSpec.scala b/src/test/scala/org/apache/nlpcraft/examples/sql/NCSqlModelSpec.scala
index bc2b06f..e92400d 100644
--- a/src/test/scala/org/apache/nlpcraft/examples/sql/NCSqlModelSpec.scala
+++ b/src/test/scala/org/apache/nlpcraft/examples/sql/NCSqlModelSpec.scala
@@ -105,12 +105,11 @@ class NCSqlModelSpec extends NCEnricherBaseSpec {
                 lim(text = "10", limit=10, index =1, note="tbl:suppliers"),
                 usr(text = "suppliers", id = "tbl:suppliers")
             ),
-            // TODO: the suspicious winner.
             _ ⇒ checkExists(
                 txt = "last year Exotic Liquids orders",
                 dte(text="last year"),
                 usr(text = "Exotic Liquids", id = "condition:value"),
-                srt(text = "orders", typ = SUBJ_ONLY, note = "condition:value", index = 1)
+                usr(text = "orders", id = "tbl:orders")
             ),
             _ ⇒ checkExists(
                 txt = "give me the orders sorted by ship date",
@@ -141,16 +140,14 @@ class NCSqlModelSpec extends NCEnricherBaseSpec {
                 nlp(text = "sorted"),
                 nlp(text = "by")
             ),
-            // TODO: the suspicious winner.
             _ ⇒ checkExists(
                 txt = "What are the top orders for the last 2 weeks sorted by order quantity?",
                 lim(text = "What are the top", limit = 10, index = 1, note = "tbl:orders", asc = false),
                 usr(text = "orders", id = "tbl:orders"),
                 dte(text = "for last 2 weeks"),
                 nlp(text = "the", isStop = true),
-                srt(text = "sorted by", typ = BY_ONLY, note = "tbl:orders", index = 5),
-                usr(text = "order", id = "tbl:orders"),
-                nlp(text = "quantity"),
+                srt(text = "sorted by", typ = BY_ONLY, note = "col:num", index = 5),
+                usr(text = "order quantity", id = "col:num"),
                 nlp(text = "?", isStop = true)
             ),
             _ ⇒ checkExists(
@@ -163,18 +160,6 @@ class NCSqlModelSpec extends NCEnricherBaseSpec {
                 usr(text = "order quantity", id = "col:num"),
                 nlp(text = "?", isStop = true)
             ),
-            // TODO: add following sentences.
-//            What are the best performing products for the last quarter?
-//            What are the best performing categories for the last quarter?
-//            What are the best performing employee for the last quarter?
-//            What are the best performing territory for the last quarter?
-//            What are the best performing region for the last quarter?
-//
-//            What are the least performing products for the last quarter?
-//            What are the least performing categories for the last quarter?
-//            What are the least performing employee for the last quarter?
-//            What are the least performing territory for the last quarter?
-//            What are the least performing region for the last quarter?
             _ ⇒ checkExists(
                 txt = "What are the best performing products for the last quarter?",
                 nlp(text = "What are the", isStop = true),