You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nlpcraft.apache.org by se...@apache.org on 2020/05/11 08:37:21 UTC
[incubator-nlpcraft] 02/02: Limit enricher fixed.
This is an automated email from the ASF dual-hosted git repository.
sergeykamov pushed a commit to branch NLPCRAFT-30
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
commit bb678a56d9c3858d74cda84c769be37e0e8f4fe8
Author: Sergey Kamov <se...@apache.org>
AuthorDate: Mon May 11 11:37:07 2020 +0300
Limit enricher fixed.
---
.../nlpcraft/examples/sql/SqlModelTest.scala | 2 +-
.../mgrs/nlp/enrichers/limit/NCLimitEnricher.scala | 20 +++++++++++++++--
.../mgrs/nlp/enrichers/sort/NCSortEnricher.scala | 26 +++++++++++++++++-----
.../nlpcraft/examples/sql/NCSqlModelSpec.scala | 21 +++--------------
4 files changed, 43 insertions(+), 26 deletions(-)
diff --git a/src/main/scala/org/apache/nlpcraft/examples/sql/SqlModelTest.scala b/src/main/scala/org/apache/nlpcraft/examples/sql/SqlModelTest.scala
index f6294f1..92531c0 100644
--- a/src/main/scala/org/apache/nlpcraft/examples/sql/SqlModelTest.scala
+++ b/src/main/scala/org/apache/nlpcraft/examples/sql/SqlModelTest.scala
@@ -642,7 +642,7 @@ class SqlModelTest {
Seq(
"What are the least performing categories for the last quarter?"
),
- """SELECT
+ """SELECT
| orders.freight,
| orders.order_date,
| categories.category_id,
diff --git a/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/limit/NCLimitEnricher.scala b/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/limit/NCLimitEnricher.scala
index 70ff977..23be9fd 100644
--- a/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/limit/NCLimitEnricher.scala
+++ b/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/limit/NCLimitEnricher.scala
@@ -175,6 +175,22 @@ object NCLimitEnricher extends NCProbeEnricher {
private def stemmatizeWords[T](m: Map[String, T]): Map[String, T] = m.map(p ⇒ NCNlpCoreManager.stem(p._1) → p._2)
/**
+ *
+ * @param t
+ */
+ private def isUserNotValue(t: NCNlpSentenceToken): Boolean =
+ t.find(_.isUser) match {
+ case Some(n) ⇒ !n.contains("value")
+ case None ⇒ false
+ }
+
+ /**
+ *
+ * @param n
+ */
+ private def isUserNotValue(n: NCNlpSentenceNote): Boolean = n.isUser && !n.contains("value")
+
+ /**
* Starts this component.
*/
override def start(parent: Span = null): NCService = startScopedSpan("start", parent) { _ ⇒
@@ -195,7 +211,7 @@ object NCLimitEnricher extends NCProbeEnricher {
val numsMap = NCNumericManager.find(ns).filter(_.unit.isEmpty).map(p ⇒ p.tokens → p).toMap
val groupsMap = groupNums(ns, numsMap.values)
- def isImportant(t: NCNlpSentenceToken): Boolean = t.isUser || TECH_WORDS.contains(t.stem)
+ def isImportant(t: NCNlpSentenceToken): Boolean = isUserNotValue(t) || TECH_WORDS.contains(t.stem)
// Tries to grab tokens reverse way.
// Example: A, B, C ⇒ ABC, BC, AB .. (BC will be processed first)
@@ -271,7 +287,7 @@ object NCLimitEnricher extends NCProbeEnricher {
val i1 = toks.head.index
val i2 = toks.last.index
- val refCands = toks.filter(_.exists(n ⇒ n.isUser && n.tokenIndexes.head >= i1 && n.tokenIndexes.last <= i2))
+ val refCands = toks.filter(_.exists(n ⇒ isUserNotValue(n) && n.tokenIndexes.head >= i1 && n.tokenIndexes.last <= i2))
// Reference should be last.
if (refCands.nonEmpty && refCands.last.index == toks.last.index) {
diff --git a/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/sort/NCSortEnricher.scala b/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/sort/NCSortEnricher.scala
index 9b21322..bd9df0e 100644
--- a/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/sort/NCSortEnricher.scala
+++ b/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/sort/NCSortEnricher.scala
@@ -205,8 +205,8 @@ object NCSortEnricher extends NCProbeEnricher {
between.isEmpty || between.forall(p ⇒ p.isStopWord || p.stem == STEM_AND)
}
- val minIdx = toks.dropWhile(!_.isUser).head.index
- val maxIdx = toks.reverse.dropWhile(!_.isUser).head.index
+ val minIdx = toks.dropWhile(t ⇒ !isUserNotValue(t)).head.index
+ val maxIdx = toks.reverse.dropWhile(t ⇒ !isUserNotValue(t)).head.index
require(minIdx <= maxIdx)
@@ -242,6 +242,22 @@ object NCSortEnricher extends NCProbeEnricher {
/**
*
+ * @param t
+ */
+ private def isUserNotValue(t: NCNlpSentenceToken): Boolean =
+ t.find(_.isUser) match {
+ case Some(n) ⇒ !n.contains("value")
+ case None ⇒ false
+ }
+
+ /**
+ *
+ * @param n
+ */
+ private def isUserNotValue(n: NCNlpSentenceNote): Boolean = n.isUser && !n.contains("value")
+
+ /**
+ *
* @param toks
*/
private def tryToMatch(toks: Seq[NCNlpSentenceToken]): Option[Match] = {
@@ -287,7 +303,7 @@ object NCSortEnricher extends NCProbeEnricher {
"BY"
else if (orderToks.contains(t))
"ORDER"
- else if (t.isUser)
+ else if (isUserNotValue(t))
"x"
else
"-"
@@ -299,7 +315,7 @@ object NCSortEnricher extends NCProbeEnricher {
val i2 = others.last.index
val othersRefs = others.filter(
- t ⇒ t.exists(n ⇒ n.isUser && n.tokenIndexes.head >= i1 && n.tokenIndexes.last <= i2)
+ t ⇒ t.exists(n ⇒ isUserNotValue(n) && n.tokenIndexes.head >= i1 && n.tokenIndexes.last <= i2)
)
if (
@@ -413,7 +429,7 @@ object NCSortEnricher extends NCProbeEnricher {
"modelId" → mdl.model.getId,
"txt" → ns.text) { _ ⇒
val notes = mutable.HashSet.empty[NCNlpSentenceNote]
- def isImportant(t: NCNlpSentenceToken): Boolean = t.isUser || MASK_WORDS.contains(t.stem)
+ def isImportant(t: NCNlpSentenceToken): Boolean = isUserNotValue(t) || MASK_WORDS.contains(t.stem)
for (toks ← ns.tokenMixWithStopWords() if NCEnricherProcessor.validImportant(ns, toks, isImportant)) {
tryToMatch(toks) match {
diff --git a/src/test/scala/org/apache/nlpcraft/examples/sql/NCSqlModelSpec.scala b/src/test/scala/org/apache/nlpcraft/examples/sql/NCSqlModelSpec.scala
index bc2b06f..e92400d 100644
--- a/src/test/scala/org/apache/nlpcraft/examples/sql/NCSqlModelSpec.scala
+++ b/src/test/scala/org/apache/nlpcraft/examples/sql/NCSqlModelSpec.scala
@@ -105,12 +105,11 @@ class NCSqlModelSpec extends NCEnricherBaseSpec {
lim(text = "10", limit=10, index =1, note="tbl:suppliers"),
usr(text = "suppliers", id = "tbl:suppliers")
),
- // TODO: the suspicious winner.
_ ⇒ checkExists(
txt = "last year Exotic Liquids orders",
dte(text="last year"),
usr(text = "Exotic Liquids", id = "condition:value"),
- srt(text = "orders", typ = SUBJ_ONLY, note = "condition:value", index = 1)
+ usr(text = "orders", id = "tbl:orders")
),
_ ⇒ checkExists(
txt = "give me the orders sorted by ship date",
@@ -141,16 +140,14 @@ class NCSqlModelSpec extends NCEnricherBaseSpec {
nlp(text = "sorted"),
nlp(text = "by")
),
- // TODO: the suspicious winner.
_ ⇒ checkExists(
txt = "What are the top orders for the last 2 weeks sorted by order quantity?",
lim(text = "What are the top", limit = 10, index = 1, note = "tbl:orders", asc = false),
usr(text = "orders", id = "tbl:orders"),
dte(text = "for last 2 weeks"),
nlp(text = "the", isStop = true),
- srt(text = "sorted by", typ = BY_ONLY, note = "tbl:orders", index = 5),
- usr(text = "order", id = "tbl:orders"),
- nlp(text = "quantity"),
+ srt(text = "sorted by", typ = BY_ONLY, note = "col:num", index = 5),
+ usr(text = "order quantity", id = "col:num"),
nlp(text = "?", isStop = true)
),
_ ⇒ checkExists(
@@ -163,18 +160,6 @@ class NCSqlModelSpec extends NCEnricherBaseSpec {
usr(text = "order quantity", id = "col:num"),
nlp(text = "?", isStop = true)
),
- // TODO: add following sentences.
-// What are the best performing products for the last quarter?
-// What are the best performing categories for the last quarter?
-// What are the best performing employee for the last quarter?
-// What are the best performing territory for the last quarter?
-// What are the best performing region for the last quarter?
-//
-// What are the least performing products for the last quarter?
-// What are the least performing categories for the last quarter?
-// What are the least performing employee for the last quarter?
-// What are the least performing territory for the last quarter?
-// What are the least performing region for the last quarter?
_ ⇒ checkExists(
txt = "What are the best performing products for the last quarter?",
nlp(text = "What are the", isStop = true),