You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nlpcraft.apache.org by se...@apache.org on 2021/09/03 13:56:03 UTC

[incubator-nlpcraft] branch NLPCRAFT-431-430 created (now 3ce31d1)

This is an automated email from the ASF dual-hosted git repository.

sergeykamov pushed a change to branch NLPCRAFT-431-430
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git.


      at 3ce31d1  WIP.

This branch includes the following new commits:

     new 3ce31d1  WIP.

The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


[incubator-nlpcraft] 01/01: WIP.

Posted by se...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

sergeykamov pushed a commit to branch NLPCRAFT-431-430
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git

commit 3ce31d194ad1cf71fc84ed38c3c31d0130d3d8b2
Author: Sergey Kamov <sk...@gmail.com>
AuthorDate: Fri Sep 3 16:55:55 2021 +0300

    WIP.
---
 .../nlpcraft/common/nlp/numeric/NCNumeric.scala    |  13 ++-
 .../common/nlp/numeric/NCNumericManager.scala      |  24 ++++-
 .../nlp/enrichers/numeric/NCNumericEnricher.scala  | 111 ++++++++++++---------
 3 files changed, 95 insertions(+), 53 deletions(-)

diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/numeric/NCNumeric.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/numeric/NCNumeric.scala
index 75a3365..1de9d4b 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/numeric/NCNumeric.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/numeric/NCNumeric.scala
@@ -28,14 +28,21 @@ case class NCNumericUnit(name: String, unitType: String)
 
 /**
   *
+  * @param unit
+  * @param tokens
+  */
+case class NCNumericUnitData(unit: NCNumericUnit, tokens: Seq[NCNlpSentenceToken])
+
+/**
+  *
   * @param tokens
   * @param value
   * @param isFractional
-  * @param unit
+  * @param unitData
   */
 case class NCNumeric(
     tokens: Seq[NCNlpSentenceToken],
     value: Double,
     isFractional: Boolean,
-    unit: Option[NCNumericUnit]
-)
+    unitData: Option[NCNumericUnitData]
+)
\ No newline at end of file
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/numeric/NCNumericManager.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/numeric/NCNumericManager.scala
index a428ab9..1dab5ef 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/numeric/NCNumericManager.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/numeric/NCNumericManager.scala
@@ -91,8 +91,17 @@ object NCNumericManager extends NCService {
         val after = s.drop(num.length)
 
         if (num.nonEmpty && after.nonEmpty) {
-            def mkNumeric(u: NCNumericUnit): Option[NCNumeric] =
-                Some(NCNumeric(Seq(t), java.lang.Double.valueOf(num), isFractional = isFractional(num), unit = Some(u)))
+            def mkNumeric(u: NCNumericUnit): Option[NCNumeric] = {
+                val toks = Seq(t)
+
+                Some(
+                    NCNumeric(
+                        toks,
+                        java.lang.Double.valueOf(num),
+                        isFractional = isFractional(num),
+                        unitData = Some(NCNumericUnitData(u, toks)))
+                )
+            }
 
             unitsOrigs.get(after) match {
                 case Some(u) => mkNumeric(u)
@@ -123,12 +132,14 @@ object NCNumericManager extends NCService {
                 senWords.indexOfSlice(dtWords) match {
                     case -1 => None
                     case idx =>
+                        val toks = senToks.slice(idx, idx + dtWords.length)
+
                         Some(
                             NCNumeric(
-                                tokens = senToks.slice(idx, idx + dtWords.length),
+                                tokens = toks,
                                 value = dtPeriod.value,
                                 isFractional = false,
-                                unit = Some(dtPeriod.unit)
+                                unitData = Some(NCNumericUnitData(dtPeriod.unit, toks))
                             )
                         )
                 }
@@ -404,7 +415,10 @@ object NCNumericManager extends NCService {
                             None
                     }).headOption match {
                         case Some((unit, unitToks)) =>
-                            val numWithUnit = NCNumeric(seq ++ unitToks, v, isFractional = isFractional, Some(unit))
+                            val numWithUnit =
+                                NCNumeric(
+                                    seq ++ unitToks, v, isFractional = isFractional, Some(NCNumericUnitData(unit, unitToks))
+                                )
 
                             // If unit name is same as user element name,
                             // it returns both variants: numeric with unit and without.
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/numeric/NCNumericEnricher.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/numeric/NCNumericEnricher.scala
index b28f198..b89ff99 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/numeric/NCNumericEnricher.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/numeric/NCNumericEnricher.scala
@@ -198,7 +198,7 @@ object NCNumericEnricher extends NCServerEnricher {
     private def toString(seq: Seq[NCNlpSentenceToken], sep: String = " ", stem: Boolean = false) =
         seq.map(t => if (stem) t.stem else t.normText).mkString(sep)
 
-    private def mkNote(
+    private def mkNotes(
         toks: Seq[NCNlpSentenceToken],
         from: Double,
         fromIncl: Boolean,
@@ -206,9 +206,10 @@ object NCNumericEnricher extends NCServerEnricher {
         to: Double,
         toIncl: Boolean,
         toFractional: Boolean,
-        unitOpt: Option[NCNumericUnit]
-    ): NCNlpSentenceNote = {
-        val params = mutable.ArrayBuffer.empty[(String, Any)] ++
+        unitDataOpt: Option[NCNumericUnitData],
+    ): Seq[NCNlpSentenceNote] = {
+        val params =
+            mutable.ArrayBuffer.empty[(String, Any)] ++
             Seq(
                 "from" -> from,
                 "fromIncl" -> fromIncl,
@@ -222,14 +223,28 @@ object NCNumericEnricher extends NCServerEnricher {
                 "isToPositiveInfinity" -> (to == MAX_VALUE)
             )
 
-        unitOpt match {
-            case Some(unit) =>
-                params += "unit" -> unit.name
-                params += "unitType" -> unit.unitType
-            case None => // No-op.
+        unitDataOpt match {
+            case Some(unitData) =>
+                def extend(): Seq[(String, Any)] = {
+                    params += "unit" -> unitData.unit.name
+                    params += "unitType" -> unitData.unit.unitType
+
+                    params
+                }
+
+                if (unitData.tokens == toks)
+                    Seq(NCNlpSentenceNote(toks.map(_.index), "nlpcraft:num", extend():_*))
+                else {
+                    val n1 = NCNlpSentenceNote(
+                        toks.filter(t => !unitData.tokens.contains(t)).map(_.index), "nlpcraft:num", params.clone():_*
+                    )
+                    val n2 = NCNlpSentenceNote(toks.map(_.index), "nlpcraft:num", extend():_*)
+
+                    Seq(n1, n2)
+                }
+
+            case None => Seq(NCNlpSentenceNote(toks.map(_.index), "nlpcraft:num", params:_*))
         }
-    
-        NCNlpSentenceNote(toks.map(_.index), "nlpcraft:num", params:_*)
     }
 
     /**
@@ -274,25 +289,28 @@ object NCNumericEnricher extends NCServerEnricher {
     
                     val prepToks = Seq(getBefore(ts1)) ++ ts1 ++ Seq(getBefore(ts2)) ++ ts2
     
-                    val badRange = num1.unit.isDefined && num2.unit.isDefined && num1.unit != num2.unit
+                    val badRange =
+                        num1.unitData.isDefined &&
+                        num2.unitData.isDefined &&
+                        num1.unitData.get.unit != num2.unitData.get.unit
     
                     if (!badRange) {
                         val unit =
-                            if (num1.unit.isDefined && num2.unit.isEmpty)
-                                num1.unit
-                            else if (num1.unit.isEmpty && num2.unit.isDefined)
-                                num2.unit
-                            else if (num1.unit.isEmpty && num2.unit.isEmpty)
+                            if (num1.unitData.isDefined && num2.unitData.isEmpty)
+                                num1.unitData
+                            else if (num1.unitData.isEmpty && num2.unitData.isDefined)
+                                num2.unitData
+                            else if (num1.unitData.isEmpty && num2.unitData.isEmpty)
                                 None
-                            else{
-                                require(num1.unit == num2.unit)
-    
-                                num1.unit
+                            else {
+                                require(num1.unitData.get.unit == num2.unitData.get.unit)
+
+                                Some(NCNumericUnitData(num1.unitData.get.unit, num1.tokens ++ num2.tokens))
                             }
     
-                        val note = p._2 match {
+                        val notes = p._2 match {
                             case BETWEEN_EXCLUSIVE =>
-                                mkNote(
+                                mkNotes(
                                     prepToks,
                                     d1,
                                     fromIncl = false,
@@ -303,7 +321,7 @@ object NCNumericEnricher extends NCServerEnricher {
                                     unit
                                 )
                             case BETWEEN_INCLUSIVE =>
-                                mkNote(
+                                mkNotes(
                                     prepToks,
                                     d1,
                                     fromIncl = true,
@@ -315,8 +333,9 @@ object NCNumericEnricher extends NCServerEnricher {
                                 )
                             case _ => throw new AssertionError(s"Illegal note type: ${p._2}.")
                         }
-    
-                        prepToks.foreach(_.add(note))
+
+                        for (note <- notes)
+                            prepToks.foreach(_.add(note))
     
                         processed ++= ts1
                         processed ++= ts2
@@ -340,10 +359,10 @@ object NCNumericEnricher extends NCServerEnricher {
     
                             processed ++= toks
     
-                            val note =
+                            val notes =
                                 prep.prepositionType match {
                                     case MORE =>
-                                        mkNote(
+                                        mkNotes(
                                             toks,
                                             num.value,
                                             fromIncl = false,
@@ -351,10 +370,10 @@ object NCNumericEnricher extends NCServerEnricher {
                                             to = MAX_VALUE,
                                             toIncl = true,
                                             toFractional = num.isFractional,
-                                            num.unit
+                                            num.unitData
                                         )
                                     case MORE_OR_EQUAL =>
-                                        mkNote(
+                                        mkNotes(
                                             toks,
                                             num.value,
                                             fromIncl = true,
@@ -362,10 +381,10 @@ object NCNumericEnricher extends NCServerEnricher {
                                             to = MAX_VALUE,
                                             toIncl = true,
                                             toFractional = num.isFractional,
-                                            num.unit
+                                            num.unitData
                                         )
                                     case LESS =>
-                                        mkNote(
+                                        mkNotes(
                                             toks,
                                             MIN_VALUE,
                                             fromIncl = true,
@@ -373,10 +392,10 @@ object NCNumericEnricher extends NCServerEnricher {
                                             to = num.value,
                                             toIncl = false,
                                             toFractional = num.isFractional,
-                                            num.unit
+                                            num.unitData
                                         )
                                     case LESS_OR_EQUAL =>
-                                        mkNote(
+                                        mkNotes(
                                             toks,
                                             MIN_VALUE,
                                             fromIncl = true,
@@ -384,10 +403,10 @@ object NCNumericEnricher extends NCServerEnricher {
                                             to = num.value,
                                             toIncl = true,
                                             toFractional = num.isFractional,
-                                            num.unit
+                                            num.unitData
                                         )
                                     case EQUAL =>
-                                        mkNote(
+                                        mkNotes(
                                             toks,
                                             num.value,
                                             fromIncl = true,
@@ -395,10 +414,10 @@ object NCNumericEnricher extends NCServerEnricher {
                                             to = num.value,
                                             toIncl = true,
                                             toFractional = num.isFractional,
-                                            num.unit
+                                            num.unitData
                                         )
                                     case NOT_EQUAL =>
-                                        mkNote(
+                                        mkNotes(
                                             toks,
                                             num.value,
                                             fromIncl = false,
@@ -406,12 +425,13 @@ object NCNumericEnricher extends NCServerEnricher {
                                             to = num.value,
                                             toIncl = false,
                                             toFractional = num.isFractional,
-                                            num.unit
+                                            num.unitData
                                         )
                                     case _ => throw new AssertionError(s"Illegal note type: ${prep.prepositionType}.")
                                 }
-    
-                            toks.foreach(_.add(note))
+
+                            for (note <- notes)
+                                toks.foreach(_.add(note))
                         }
                 }
     
@@ -423,7 +443,7 @@ object NCNumericEnricher extends NCServerEnricher {
     
             // Numeric without conditions.
             for (num <- nums if !processed.exists(num.tokens.contains)) {
-                val note = mkNote(
+                val notes = mkNotes(
                     num.tokens,
                     num.value,
                     fromIncl = true,
@@ -431,12 +451,13 @@ object NCNumericEnricher extends NCServerEnricher {
                     num.value,
                     toIncl = true,
                     num.isFractional,
-                    num.unit
+                    num.unitData
                 )
     
                 processed ++= num.tokens
-    
-                num.tokens.foreach(_.add(note))
+
+                for (note <- notes)
+                    num.tokens.foreach(_.add(note))
             }
         }
     }