You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nlpcraft.apache.org by se...@apache.org on 2021/09/03 13:56:04 UTC

[incubator-nlpcraft] 01/01: WIP.

This is an automated email from the ASF dual-hosted git repository.

sergeykamov pushed a commit to branch NLPCRAFT-431-430
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git

commit 3ce31d194ad1cf71fc84ed38c3c31d0130d3d8b2
Author: Sergey Kamov <sk...@gmail.com>
AuthorDate: Fri Sep 3 16:55:55 2021 +0300

    WIP.
---
 .../nlpcraft/common/nlp/numeric/NCNumeric.scala    |  13 ++-
 .../common/nlp/numeric/NCNumericManager.scala      |  24 ++++-
 .../nlp/enrichers/numeric/NCNumericEnricher.scala  | 111 ++++++++++++---------
 3 files changed, 95 insertions(+), 53 deletions(-)

diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/numeric/NCNumeric.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/numeric/NCNumeric.scala
index 75a3365..1de9d4b 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/numeric/NCNumeric.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/numeric/NCNumeric.scala
@@ -28,14 +28,21 @@ case class NCNumericUnit(name: String, unitType: String)
 
 /**
   *
+  * @param unit
+  * @param tokens
+  */
+case class NCNumericUnitData(unit: NCNumericUnit, tokens: Seq[NCNlpSentenceToken])
+
+/**
+  *
   * @param tokens
   * @param value
   * @param isFractional
-  * @param unit
+  * @param unitData
   */
 case class NCNumeric(
     tokens: Seq[NCNlpSentenceToken],
     value: Double,
     isFractional: Boolean,
-    unit: Option[NCNumericUnit]
-)
+    unitData: Option[NCNumericUnitData]
+)
\ No newline at end of file
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/numeric/NCNumericManager.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/numeric/NCNumericManager.scala
index a428ab9..1dab5ef 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/numeric/NCNumericManager.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/numeric/NCNumericManager.scala
@@ -91,8 +91,17 @@ object NCNumericManager extends NCService {
         val after = s.drop(num.length)
 
         if (num.nonEmpty && after.nonEmpty) {
-            def mkNumeric(u: NCNumericUnit): Option[NCNumeric] =
-                Some(NCNumeric(Seq(t), java.lang.Double.valueOf(num), isFractional = isFractional(num), unit = Some(u)))
+            def mkNumeric(u: NCNumericUnit): Option[NCNumeric] = {
+                val toks = Seq(t)
+
+                Some(
+                    NCNumeric(
+                        toks,
+                        java.lang.Double.valueOf(num),
+                        isFractional = isFractional(num),
+                        unitData = Some(NCNumericUnitData(u, toks)))
+                )
+            }
 
             unitsOrigs.get(after) match {
                 case Some(u) => mkNumeric(u)
@@ -123,12 +132,14 @@ object NCNumericManager extends NCService {
                 senWords.indexOfSlice(dtWords) match {
                     case -1 => None
                     case idx =>
+                        val toks = senToks.slice(idx, idx + dtWords.length)
+
                         Some(
                             NCNumeric(
-                                tokens = senToks.slice(idx, idx + dtWords.length),
+                                tokens = toks,
                                 value = dtPeriod.value,
                                 isFractional = false,
-                                unit = Some(dtPeriod.unit)
+                                unitData = Some(NCNumericUnitData(dtPeriod.unit, toks))
                             )
                         )
                 }
@@ -404,7 +415,10 @@ object NCNumericManager extends NCService {
                             None
                     }).headOption match {
                         case Some((unit, unitToks)) =>
-                            val numWithUnit = NCNumeric(seq ++ unitToks, v, isFractional = isFractional, Some(unit))
+                            val numWithUnit =
+                                NCNumeric(
+                                    seq ++ unitToks, v, isFractional = isFractional, Some(NCNumericUnitData(unit, unitToks))
+                                )
 
                             // If unit name is same as user element name,
                             // it returns both variants: numeric with unit and without.
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/numeric/NCNumericEnricher.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/numeric/NCNumericEnricher.scala
index b28f198..b89ff99 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/numeric/NCNumericEnricher.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/numeric/NCNumericEnricher.scala
@@ -198,7 +198,7 @@ object NCNumericEnricher extends NCServerEnricher {
     private def toString(seq: Seq[NCNlpSentenceToken], sep: String = " ", stem: Boolean = false) =
         seq.map(t => if (stem) t.stem else t.normText).mkString(sep)
 
-    private def mkNote(
+    private def mkNotes(
         toks: Seq[NCNlpSentenceToken],
         from: Double,
         fromIncl: Boolean,
@@ -206,9 +206,10 @@ object NCNumericEnricher extends NCServerEnricher {
         to: Double,
         toIncl: Boolean,
         toFractional: Boolean,
-        unitOpt: Option[NCNumericUnit]
-    ): NCNlpSentenceNote = {
-        val params = mutable.ArrayBuffer.empty[(String, Any)] ++
+        unitDataOpt: Option[NCNumericUnitData],
+    ): Seq[NCNlpSentenceNote] = {
+        val params =
+            mutable.ArrayBuffer.empty[(String, Any)] ++
             Seq(
                 "from" -> from,
                 "fromIncl" -> fromIncl,
@@ -222,14 +223,28 @@ object NCNumericEnricher extends NCServerEnricher {
                 "isToPositiveInfinity" -> (to == MAX_VALUE)
             )
 
-        unitOpt match {
-            case Some(unit) =>
-                params += "unit" -> unit.name
-                params += "unitType" -> unit.unitType
-            case None => // No-op.
+        unitDataOpt match {
+            case Some(unitData) =>
+                def extend(): Seq[(String, Any)] = {
+                    params += "unit" -> unitData.unit.name
+                    params += "unitType" -> unitData.unit.unitType
+
+                    params
+                }
+
+                if (unitData.tokens == toks)
+                    Seq(NCNlpSentenceNote(toks.map(_.index), "nlpcraft:num", extend():_*))
+                else {
+                    val n1 = NCNlpSentenceNote(
+                        toks.filter(t => !unitData.tokens.contains(t)).map(_.index), "nlpcraft:num", params.clone():_*
+                    )
+                    val n2 = NCNlpSentenceNote(toks.map(_.index), "nlpcraft:num", extend():_*)
+
+                    Seq(n1, n2)
+                }
+
+            case None => Seq(NCNlpSentenceNote(toks.map(_.index), "nlpcraft:num", params:_*))
         }
-    
-        NCNlpSentenceNote(toks.map(_.index), "nlpcraft:num", params:_*)
     }
 
     /**
@@ -274,25 +289,28 @@ object NCNumericEnricher extends NCServerEnricher {
     
                     val prepToks = Seq(getBefore(ts1)) ++ ts1 ++ Seq(getBefore(ts2)) ++ ts2
     
-                    val badRange = num1.unit.isDefined && num2.unit.isDefined && num1.unit != num2.unit
+                    val badRange =
+                        num1.unitData.isDefined &&
+                        num2.unitData.isDefined &&
+                        num1.unitData.get.unit != num2.unitData.get.unit
     
                     if (!badRange) {
                         val unit =
-                            if (num1.unit.isDefined && num2.unit.isEmpty)
-                                num1.unit
-                            else if (num1.unit.isEmpty && num2.unit.isDefined)
-                                num2.unit
-                            else if (num1.unit.isEmpty && num2.unit.isEmpty)
+                            if (num1.unitData.isDefined && num2.unitData.isEmpty)
+                                num1.unitData
+                            else if (num1.unitData.isEmpty && num2.unitData.isDefined)
+                                num2.unitData
+                            else if (num1.unitData.isEmpty && num2.unitData.isEmpty)
                                 None
-                            else{
-                                require(num1.unit == num2.unit)
-    
-                                num1.unit
+                            else {
+                                require(num1.unitData.get.unit == num2.unitData.get.unit)
+
+                                Some(NCNumericUnitData(num1.unitData.get.unit, num1.tokens ++ num2.tokens))
                             }
     
-                        val note = p._2 match {
+                        val notes = p._2 match {
                             case BETWEEN_EXCLUSIVE =>
-                                mkNote(
+                                mkNotes(
                                     prepToks,
                                     d1,
                                     fromIncl = false,
@@ -303,7 +321,7 @@ object NCNumericEnricher extends NCServerEnricher {
                                     unit
                                 )
                             case BETWEEN_INCLUSIVE =>
-                                mkNote(
+                                mkNotes(
                                     prepToks,
                                     d1,
                                     fromIncl = true,
@@ -315,8 +333,9 @@ object NCNumericEnricher extends NCServerEnricher {
                                 )
                             case _ => throw new AssertionError(s"Illegal note type: ${p._2}.")
                         }
-    
-                        prepToks.foreach(_.add(note))
+
+                        for (note <- notes)
+                            prepToks.foreach(_.add(note))
     
                         processed ++= ts1
                         processed ++= ts2
@@ -340,10 +359,10 @@ object NCNumericEnricher extends NCServerEnricher {
     
                             processed ++= toks
     
-                            val note =
+                            val notes =
                                 prep.prepositionType match {
                                     case MORE =>
-                                        mkNote(
+                                        mkNotes(
                                             toks,
                                             num.value,
                                             fromIncl = false,
@@ -351,10 +370,10 @@ object NCNumericEnricher extends NCServerEnricher {
                                             to = MAX_VALUE,
                                             toIncl = true,
                                             toFractional = num.isFractional,
-                                            num.unit
+                                            num.unitData
                                         )
                                     case MORE_OR_EQUAL =>
-                                        mkNote(
+                                        mkNotes(
                                             toks,
                                             num.value,
                                             fromIncl = true,
@@ -362,10 +381,10 @@ object NCNumericEnricher extends NCServerEnricher {
                                             to = MAX_VALUE,
                                             toIncl = true,
                                             toFractional = num.isFractional,
-                                            num.unit
+                                            num.unitData
                                         )
                                     case LESS =>
-                                        mkNote(
+                                        mkNotes(
                                             toks,
                                             MIN_VALUE,
                                             fromIncl = true,
@@ -373,10 +392,10 @@ object NCNumericEnricher extends NCServerEnricher {
                                             to = num.value,
                                             toIncl = false,
                                             toFractional = num.isFractional,
-                                            num.unit
+                                            num.unitData
                                         )
                                     case LESS_OR_EQUAL =>
-                                        mkNote(
+                                        mkNotes(
                                             toks,
                                             MIN_VALUE,
                                             fromIncl = true,
@@ -384,10 +403,10 @@ object NCNumericEnricher extends NCServerEnricher {
                                             to = num.value,
                                             toIncl = true,
                                             toFractional = num.isFractional,
-                                            num.unit
+                                            num.unitData
                                         )
                                     case EQUAL =>
-                                        mkNote(
+                                        mkNotes(
                                             toks,
                                             num.value,
                                             fromIncl = true,
@@ -395,10 +414,10 @@ object NCNumericEnricher extends NCServerEnricher {
                                             to = num.value,
                                             toIncl = true,
                                             toFractional = num.isFractional,
-                                            num.unit
+                                            num.unitData
                                         )
                                     case NOT_EQUAL =>
-                                        mkNote(
+                                        mkNotes(
                                             toks,
                                             num.value,
                                             fromIncl = false,
@@ -406,12 +425,13 @@ object NCNumericEnricher extends NCServerEnricher {
                                             to = num.value,
                                             toIncl = false,
                                             toFractional = num.isFractional,
-                                            num.unit
+                                            num.unitData
                                         )
                                     case _ => throw new AssertionError(s"Illegal note type: ${prep.prepositionType}.")
                                 }
-    
-                            toks.foreach(_.add(note))
+
+                            for (note <- notes)
+                                toks.foreach(_.add(note))
                         }
                 }
     
@@ -423,7 +443,7 @@ object NCNumericEnricher extends NCServerEnricher {
     
             // Numeric without conditions.
             for (num <- nums if !processed.exists(num.tokens.contains)) {
-                val note = mkNote(
+                val notes = mkNotes(
                     num.tokens,
                     num.value,
                     fromIncl = true,
@@ -431,12 +451,13 @@ object NCNumericEnricher extends NCServerEnricher {
                     num.value,
                     toIncl = true,
                     num.isFractional,
-                    num.unit
+                    num.unitData
                 )
     
                 processed ++= num.tokens
-    
-                num.tokens.foreach(_.add(note))
+
+                for (note <- notes)
+                    num.tokens.foreach(_.add(note))
             }
         }
     }