You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@nlpcraft.apache.org by se...@apache.org on 2020/05/11 08:37:19 UTC

[incubator-nlpcraft] branch NLPCRAFT-30 updated (bbe7876 -> bb678a5)

This is an automated email from the ASF dual-hosted git repository.

sergeykamov pushed a change to branch NLPCRAFT-30
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git.


    from bbe7876  Some comments added.
     new adf171a  Limit enricher fixed.
     new bb678a5  Limit enricher fixed.

The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 .../nlpcraft/examples/sql/SqlModelTest.scala       |  2 +-
 .../mgrs/nlp/enrichers/limit/NCLimitEnricher.scala | 20 +++++++++++++--
 .../mgrs/nlp/enrichers/sort/NCSortEnricher.scala   | 26 +++++++++++++++----
 .../probe/mgrs/nlp/impl/NCEnricherProcessor.scala  |  6 +++--
 .../nlpcraft/examples/sql/NCSqlModelSpec.scala     | 29 ++++------------------
 5 files changed, 49 insertions(+), 34 deletions(-)

[incubator-nlpcraft] 02/02: Limit enricher fixed.

Posted by se...@apache.org.

This is an automated email from the ASF dual-hosted git repository.

sergeykamov pushed a commit to branch NLPCRAFT-30
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git

commit bb678a56d9c3858d74cda84c769be37e0e8f4fe8
Author: Sergey Kamov <se...@apache.org>
AuthorDate: Mon May 11 11:37:07 2020 +0300

    Limit enricher fixed.
---
 .../nlpcraft/examples/sql/SqlModelTest.scala       |  2 +-
 .../mgrs/nlp/enrichers/limit/NCLimitEnricher.scala | 20 +++++++++++++++--
 .../mgrs/nlp/enrichers/sort/NCSortEnricher.scala   | 26 +++++++++++++++++-----
 .../nlpcraft/examples/sql/NCSqlModelSpec.scala     | 21 +++--------------
 4 files changed, 43 insertions(+), 26 deletions(-)

diff --git a/src/main/scala/org/apache/nlpcraft/examples/sql/SqlModelTest.scala b/src/main/scala/org/apache/nlpcraft/examples/sql/SqlModelTest.scala
index f6294f1..92531c0 100644
--- a/src/main/scala/org/apache/nlpcraft/examples/sql/SqlModelTest.scala
+++ b/src/main/scala/org/apache/nlpcraft/examples/sql/SqlModelTest.scala
@@ -642,7 +642,7 @@ class SqlModelTest {
                 Seq(
                     "What are the least performing categories for the last quarter?"
                 ),
-                """SELECT
+                 """SELECT
                   |  orders.freight,
                   |  orders.order_date,
                   |  categories.category_id,
diff --git a/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/limit/NCLimitEnricher.scala b/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/limit/NCLimitEnricher.scala
index 70ff977..23be9fd 100644
--- a/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/limit/NCLimitEnricher.scala
+++ b/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/limit/NCLimitEnricher.scala
@@ -175,6 +175,22 @@ object NCLimitEnricher extends NCProbeEnricher {
     private def stemmatizeWords[T](m: Map[String, T]): Map[String, T] = m.map(p ⇒ NCNlpCoreManager.stem(p._1) → p._2)
 
     /**
+      *
+      * @param t
+      */
+    private def isUserNotValue(t: NCNlpSentenceToken): Boolean =
+        t.find(_.isUser) match {
+            case Some(n) ⇒ !n.contains("value")
+            case None ⇒ false
+        }
+
+    /**
+      *
+      * @param n
+      */
+    private def isUserNotValue(n: NCNlpSentenceNote): Boolean = n.isUser && !n.contains("value")
+
+    /**
       * Starts this component.
       */
     override def start(parent: Span = null): NCService = startScopedSpan("start", parent) { _ ⇒
@@ -195,7 +211,7 @@ object NCLimitEnricher extends NCProbeEnricher {
             val numsMap = NCNumericManager.find(ns).filter(_.unit.isEmpty).map(p ⇒ p.tokens → p).toMap
             val groupsMap = groupNums(ns, numsMap.values)
 
-            def isImportant(t: NCNlpSentenceToken): Boolean = t.isUser || TECH_WORDS.contains(t.stem)
+            def isImportant(t: NCNlpSentenceToken): Boolean = isUserNotValue(t) || TECH_WORDS.contains(t.stem)
 
             // Tries to grab tokens reverse way.
             // Example: A, B, C ⇒ ABC, BC, AB .. (BC will be processed first)
@@ -271,7 +287,7 @@ object NCLimitEnricher extends NCProbeEnricher {
         val i1 = toks.head.index
         val i2 = toks.last.index
 
-        val refCands = toks.filter(_.exists(n ⇒ n.isUser && n.tokenIndexes.head >= i1 && n.tokenIndexes.last <= i2))
+        val refCands = toks.filter(_.exists(n ⇒ isUserNotValue(n) && n.tokenIndexes.head >= i1 && n.tokenIndexes.last <= i2))
 
         // Reference should be last.
         if (refCands.nonEmpty && refCands.last.index == toks.last.index) {
diff --git a/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/sort/NCSortEnricher.scala b/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/sort/NCSortEnricher.scala
index 9b21322..bd9df0e 100644
--- a/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/sort/NCSortEnricher.scala
+++ b/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/sort/NCSortEnricher.scala
@@ -205,8 +205,8 @@ object NCSortEnricher extends NCProbeEnricher {
                     between.isEmpty || between.forall(p ⇒ p.isStopWord || p.stem == STEM_AND)
                 }
 
-                val minIdx = toks.dropWhile(!_.isUser).head.index
-                val maxIdx = toks.reverse.dropWhile(!_.isUser).head.index
+                val minIdx = toks.dropWhile(t ⇒ !isUserNotValue(t)).head.index
+                val maxIdx = toks.reverse.dropWhile(t ⇒ !isUserNotValue(t)).head.index
 
                 require(minIdx <= maxIdx)
 
@@ -242,6 +242,22 @@ object NCSortEnricher extends NCProbeEnricher {
 
     /**
       *
+      * @param t
+      */
+    private def isUserNotValue(t: NCNlpSentenceToken): Boolean =
+        t.find(_.isUser) match {
+            case Some(n) ⇒ !n.contains("value")
+            case None ⇒ false
+        }
+
+    /**
+      *
+      * @param n
+      */
+    private def isUserNotValue(n: NCNlpSentenceNote): Boolean = n.isUser && !n.contains("value")
+
+    /**
+      *
       * @param toks
       */
     private def tryToMatch(toks: Seq[NCNlpSentenceToken]): Option[Match] = {
@@ -287,7 +303,7 @@ object NCSortEnricher extends NCProbeEnricher {
                     "BY"
                 else if (orderToks.contains(t))
                     "ORDER"
-                else if (t.isUser)
+                else if (isUserNotValue(t))
                     "x"
                 else
                     "-"
@@ -299,7 +315,7 @@ object NCSortEnricher extends NCProbeEnricher {
                 val i2 = others.last.index
 
                 val othersRefs = others.filter(
-                    t ⇒ t.exists(n ⇒ n.isUser && n.tokenIndexes.head >= i1 && n.tokenIndexes.last <= i2)
+                    t ⇒ t.exists(n ⇒ isUserNotValue(n) && n.tokenIndexes.head >= i1 && n.tokenIndexes.last <= i2)
                 )
 
                 if (
@@ -413,7 +429,7 @@ object NCSortEnricher extends NCProbeEnricher {
             "modelId" → mdl.model.getId,
             "txt" → ns.text) { _ ⇒
             val notes = mutable.HashSet.empty[NCNlpSentenceNote]
-            def isImportant(t: NCNlpSentenceToken): Boolean = t.isUser || MASK_WORDS.contains(t.stem)
+            def isImportant(t: NCNlpSentenceToken): Boolean = isUserNotValue(t) || MASK_WORDS.contains(t.stem)
 
             for (toks ← ns.tokenMixWithStopWords() if NCEnricherProcessor.validImportant(ns, toks, isImportant)) {
                 tryToMatch(toks) match {
diff --git a/src/test/scala/org/apache/nlpcraft/examples/sql/NCSqlModelSpec.scala b/src/test/scala/org/apache/nlpcraft/examples/sql/NCSqlModelSpec.scala
index bc2b06f..e92400d 100644
--- a/src/test/scala/org/apache/nlpcraft/examples/sql/NCSqlModelSpec.scala
+++ b/src/test/scala/org/apache/nlpcraft/examples/sql/NCSqlModelSpec.scala
@@ -105,12 +105,11 @@ class NCSqlModelSpec extends NCEnricherBaseSpec {
                 lim(text = "10", limit=10, index =1, note="tbl:suppliers"),
                 usr(text = "suppliers", id = "tbl:suppliers")
             ),
-            // TODO: the suspicious winner.
             _ ⇒ checkExists(
                 txt = "last year Exotic Liquids orders",
                 dte(text="last year"),
                 usr(text = "Exotic Liquids", id = "condition:value"),
-                srt(text = "orders", typ = SUBJ_ONLY, note = "condition:value", index = 1)
+                usr(text = "orders", id = "tbl:orders")
             ),
             _ ⇒ checkExists(
                 txt = "give me the orders sorted by ship date",
@@ -141,16 +140,14 @@ class NCSqlModelSpec extends NCEnricherBaseSpec {
                 nlp(text = "sorted"),
                 nlp(text = "by")
             ),
-            // TODO: the suspicious winner.
             _ ⇒ checkExists(
                 txt = "What are the top orders for the last 2 weeks sorted by order quantity?",
                 lim(text = "What are the top", limit = 10, index = 1, note = "tbl:orders", asc = false),
                 usr(text = "orders", id = "tbl:orders"),
                 dte(text = "for last 2 weeks"),
                 nlp(text = "the", isStop = true),
-                srt(text = "sorted by", typ = BY_ONLY, note = "tbl:orders", index = 5),
-                usr(text = "order", id = "tbl:orders"),
-                nlp(text = "quantity"),
+                srt(text = "sorted by", typ = BY_ONLY, note = "col:num", index = 5),
+                usr(text = "order quantity", id = "col:num"),
                 nlp(text = "?", isStop = true)
             ),
             _ ⇒ checkExists(
@@ -163,18 +160,6 @@ class NCSqlModelSpec extends NCEnricherBaseSpec {
                 usr(text = "order quantity", id = "col:num"),
                 nlp(text = "?", isStop = true)
             ),
-            // TODO: add following sentences.
-//            What are the best performing products for the last quarter?
-//            What are the best performing categories for the last quarter?
-//            What are the best performing employee for the last quarter?
-//            What are the best performing territory for the last quarter?
-//            What are the best performing region for the last quarter?
-//
-//            What are the least performing products for the last quarter?
-//            What are the least performing categories for the last quarter?
-//            What are the least performing employee for the last quarter?
-//            What are the least performing territory for the last quarter?
-//            What are the least performing region for the last quarter?
             _ ⇒ checkExists(
                 txt = "What are the best performing products for the last quarter?",
                 nlp(text = "What are the", isStop = true),

[incubator-nlpcraft] 01/02: Limit enricher fixed.

Posted by se...@apache.org.

This is an automated email from the ASF dual-hosted git repository.

sergeykamov pushed a commit to branch NLPCRAFT-30
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git

commit adf171ae6b5054a676545d08bf266b51fd513525
Author: Sergey Kamov <se...@apache.org>
AuthorDate: Mon May 11 10:39:50 2020 +0300

    Limit enricher fixed.
---
 .../apache/nlpcraft/probe/mgrs/nlp/impl/NCEnricherProcessor.scala | 6 ++++--
 .../scala/org/apache/nlpcraft/examples/sql/NCSqlModelSpec.scala   | 8 ++------
 2 files changed, 6 insertions(+), 8 deletions(-)

diff --git a/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/impl/NCEnricherProcessor.scala b/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/impl/NCEnricherProcessor.scala
index da9054d..2a9f12a 100644
--- a/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/impl/NCEnricherProcessor.scala
+++ b/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/impl/NCEnricherProcessor.scala
@@ -69,7 +69,7 @@ object NCEnricherProcessor extends NCService with LazyLogging {
                     case "nlpcraft:date" ⇒ Seq("from", "to")
                     case "nlpcraft:relation" ⇒ Seq("type", "note") ++ addRefs("indexes")
                     case "nlpcraft:sort" ⇒ Seq("asc", "subjnotes", "bynotes") ++ addRefs("subjindexes", "byindexes")
-                    case "nlpcraft:limit" ⇒ Seq("limit", "asc", "note") ++ addRefs("indexes")
+                    case "nlpcraft:limit" ⇒ Seq("limit", "note") ++ addRefs("indexes", "asc") // Asc flag has sense only with references for limit.
                     case "nlpcraft:coordinate" ⇒ Seq("latitude", "longitude")
                     case "nlpcraft:num" ⇒ Seq("from", "to", "unit", "unitType")
                     case x if x.startsWith("google:") ⇒ Seq("meta", "mentionsBeginOffsets", "mentionsContents", "mentionsTypes")
@@ -772,7 +772,9 @@ object NCEnricherProcessor extends NCService with LazyLogging {
                 case "nlpcraft:sort" ⇒
                     tokensEqualOrSimilar(getListList(n1, "subjindexes"), getListList(n2, "subjindexes")) &&
                     tokensEqualOrSimilar(getListList(n1, "byindexes"), getListList(n2, "byindexes"))
-                case "nlpcraft:limit" | "nlpcraft:reference" ⇒
+                case "nlpcraft:limit"  ⇒
+                    tokensEqualOrSimilar(getList(n1, "indexes"), getList(n2, "indexes"))
+                case "nlpcraft:reference"  ⇒
                     tokensEqualOrSimilar(getList(n1, "indexes"), getList(n2, "indexes"))
 
                 case _ ⇒ true
diff --git a/src/test/scala/org/apache/nlpcraft/examples/sql/NCSqlModelSpec.scala b/src/test/scala/org/apache/nlpcraft/examples/sql/NCSqlModelSpec.scala
index e2f8e12..bc2b06f 100644
--- a/src/test/scala/org/apache/nlpcraft/examples/sql/NCSqlModelSpec.scala
+++ b/src/test/scala/org/apache/nlpcraft/examples/sql/NCSqlModelSpec.scala
@@ -153,16 +153,13 @@ class NCSqlModelSpec extends NCEnricherBaseSpec {
                 nlp(text = "quantity"),
                 nlp(text = "?", isStop = true)
             ),
-            // TODO: limit word top should be inside nlpcraft:limit
             _ ⇒ checkExists(
                 txt = "What are the top 25 orders for the last 2 weeks sorted by order quantity?",
-                nlp(text = "What are the", isStop = true),
-                nlp(text = "top"),
-                lim(text = "25", limit = 25, index = 3, note = "tbl:orders"),
+                lim(text = "What are the top 25", limit = 25, index = 1, note = "tbl:orders", asc = false),
                 usr(text = "orders", id = "tbl:orders"),
                 dte(text = "for last 2 weeks"),
                 nlp(text = "the", isStop = true),
-                srt(text = "sorted by", typ = BY_ONLY, note = "col:num", index = 7),
+                srt(text = "sorted by", typ = BY_ONLY, note = "col:num", index = 5),
                 usr(text = "order quantity", id = "col:num"),
                 nlp(text = "?", isStop = true)
             ),
@@ -194,7 +191,6 @@ class NCSqlModelSpec extends NCEnricherBaseSpec {
                 dte(text = "for last quarter"),
                 nlp(text = "the ?", isStop = true)
             )
-
         )
     }
 }