You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@nlpcraft.apache.org by se...@apache.org on 2021/01/19 15:13:13 UTC

[incubator-nlpcraft] branch NLPCRAFT-20 created (now 2647315)

This is an automated email from the ASF dual-hosted git repository.

sergeykamov pushed a change to branch NLPCRAFT-20
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git.


      at 2647315  WIP.

This branch includes the following new commits:

     new 2647315  WIP.

The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.

[incubator-nlpcraft] 01/01: WIP.

Posted by se...@apache.org.

This is an automated email from the ASF dual-hosted git repository.

sergeykamov pushed a commit to branch NLPCRAFT-20
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git

commit 2647315964b0a93b189692a0e2f0c2ff22bc5a68
Author: Sergey Kamov <sk...@gmail.com>
AuthorDate: Tue Jan 19 17:38:19 2021 +0300

    WIP.
---
 .../common/nlp/numeric/NCNumericManager.scala      | 24 ++++++++++++++--------
 .../mgrs/nlp/enrichers/limit/NCLimitEnricher.scala |  2 +-
 .../nlp/enrichers/limit/NCEnricherLimitSpec.scala  | 10 +++++++++
 3 files changed, 27 insertions(+), 9 deletions(-)

diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/numeric/NCNumericManager.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/numeric/NCNumericManager.scala
index 4fa8ef5..d359100 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/numeric/NCNumericManager.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/numeric/NCNumericManager.scala
@@ -361,7 +361,7 @@ object NCNumericManager extends NCService {
                 }).toSeq.map(_._2)
         
             val nums = grps.flatMap(seq ⇒ {
-                def mkNum(v: Double, isFractional: Boolean): NCNumeric = {
+                def mkNums(v: Double, isFractional: Boolean): Seq[NCNumeric] = {
                     // Units synonyms are not stemmed.
                     Range.inclusive(1, maxSynWords).reverse.toStream.flatMap(i ⇒ {
                         val afterNum = ns.slice(seq.last.index + 1, seq.last.index + i + 1)
@@ -386,8 +386,16 @@ object NCNumericManager extends NCService {
                         else
                             None
                     }).headOption match {
-                        case Some((unit, unitToks)) ⇒ NCNumeric(seq ++ unitToks, v, isFractional = isFractional, Some(unit))
-                        case None ⇒ NCNumeric(seq, v, isFractional = isFractional, None)
+                        case Some((unit, unitToks)) ⇒
+                            val numWithUnit = NCNumeric(seq ++ unitToks, v, isFractional = isFractional, Some(unit))
+
+                            // If unit name is same as user element name,
+                            // it returns both variants: numeric with unit and without.
+                            unitToks.flatten.count(p ⇒ !p.isNlp && p.noteType != "nlpcraft:num") match {
+                                case 1 ⇒ Seq(numWithUnit, NCNumeric(seq, v, isFractional = isFractional, None))
+                                case _ ⇒ Seq(numWithUnit)
+                            }
+                        case None ⇒ Seq(NCNumeric(seq, v, isFractional = isFractional, None))
                     }
                 }
         
@@ -395,17 +403,17 @@ object NCNumericManager extends NCService {
                     case 1 ⇒
                         val txt = seq.head.normText
                         genNums.get(txt) match {
-                            case Some(intVal) ⇒ Some(mkNum(intVal.toDouble, isFractional = false))
+                            case Some(intVal) ⇒ mkNums(intVal.toDouble, isFractional = false)
         
                             case None ⇒
                                 toNumeric(txt) match {
-                                    case Some(dblVal) ⇒ Some(mkNum(dblVal, isFractional = isFractional(txt)))
+                                    case Some(dblVal) ⇒ mkNums(dblVal, isFractional = isFractional(txt))
                                     case None ⇒ None
                                 }
                         }
                     case _ ⇒
                         genNums.get(toString(seq)) match {
-                            case Some(intVal) ⇒ Some(mkNum(intVal.toDouble, isFractional = false))
+                            case Some(intVal) ⇒ mkNums(intVal.toDouble, isFractional = false)
         
                             // Try to parse space separated numerics 1 000 000.
                             case None ⇒
@@ -414,7 +422,7 @@ object NCNumericManager extends NCService {
                                     val txt = toString(seq, "")
          
                                     toNumeric(txt) match {
-                                        case Some(dblVal) ⇒ Some(mkNum(dblVal, isFractional = isFractional(txt)))
+                                        case Some(dblVal) ⇒ mkNums(dblVal, isFractional = isFractional(txt))
                                         case None ⇒ None
                                     }
                                 }
@@ -426,7 +434,7 @@ object NCNumericManager extends NCService {
          
             val usedToks = nums.flatMap(_.tokens)
          
-            (nums ++ ns.filter(t ⇒ !usedToks.contains(t)).flatMap(mkSolidNumUnit)).sortBy(_.tokens.head.index)
+            (nums ++ ns.filter(t ⇒ !usedToks.contains(t)).flatMap(mkSolidNumUnit)).sortBy(_.tokens.head.index).distinct
         }
     }
 }
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/limit/NCLimitEnricher.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/limit/NCLimitEnricher.scala
index 9e57605..fcfafd9 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/limit/NCLimitEnricher.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/limit/NCLimitEnricher.scala
@@ -255,7 +255,7 @@ object NCLimitEnricher extends NCProbeEnricher {
             "mdlId" → mdl.model.getId,
             "txt" → ns.text) { _ ⇒
             val notes = mutable.HashSet.empty[NCNlpSentenceNote]
-            val numsMap = NCNumericManager.find(ns).filter(_.unit.isEmpty).map(p ⇒ p.tokens → p).toMap
+            val numsMap = NCNumericManager.find(ns).map(p ⇒ p.tokens → p).toMap
             val groupsMap = groupNums(ns, numsMap.values)
 
             // Tries to grab tokens reverse way.
diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/limit/NCEnricherLimitSpec.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/limit/NCEnricherLimitSpec.scala
index 00f21e6..e983030 100644
--- a/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/limit/NCEnricherLimitSpec.scala
+++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/limit/NCEnricherLimitSpec.scala
@@ -34,6 +34,16 @@ class NCEnricherLimitSpec extends NCEnricherBaseSpec {
     def test(): Unit =
         runBatch(
             _ ⇒ checkExists(
+                "top 23 A",
+                lim(text = "top 23", limit = 23, index = 1, note = "A", asc = false),
+                usr(text = "A", id = "A")
+            ),
+            _ ⇒ checkExists(
+                "top 10 A",
+                lim(text = "top 10", limit = 10, index = 1, note = "A", asc = false),
+                usr(text = "A", id = "A")
+            ),
+            _ ⇒ checkExists(
                 "top A",
                 lim(text = "top", limit = 10, index = 1, note = "A", asc = false),
                 usr(text = "A", id = "A")