You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nlpcraft.apache.org by se...@apache.org on 2021/09/03 13:56:04 UTC
[incubator-nlpcraft] 01/01: WIP.
This is an automated email from the ASF dual-hosted git repository.
sergeykamov pushed a commit to branch NLPCRAFT-431-430
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
commit 3ce31d194ad1cf71fc84ed38c3c31d0130d3d8b2
Author: Sergey Kamov <sk...@gmail.com>
AuthorDate: Fri Sep 3 16:55:55 2021 +0300
WIP.
---
.../nlpcraft/common/nlp/numeric/NCNumeric.scala | 13 ++-
.../common/nlp/numeric/NCNumericManager.scala | 24 ++++-
.../nlp/enrichers/numeric/NCNumericEnricher.scala | 111 ++++++++++++---------
3 files changed, 95 insertions(+), 53 deletions(-)
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/numeric/NCNumeric.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/numeric/NCNumeric.scala
index 75a3365..1de9d4b 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/numeric/NCNumeric.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/numeric/NCNumeric.scala
@@ -28,14 +28,21 @@ case class NCNumericUnit(name: String, unitType: String)
/**
*
+ * @param unit
+ * @param tokens
+ */
+case class NCNumericUnitData(unit: NCNumericUnit, tokens: Seq[NCNlpSentenceToken])
+
+/**
+ *
* @param tokens
* @param value
* @param isFractional
- * @param unit
+ * @param unitData
*/
case class NCNumeric(
tokens: Seq[NCNlpSentenceToken],
value: Double,
isFractional: Boolean,
- unit: Option[NCNumericUnit]
-)
+ unitData: Option[NCNumericUnitData]
+)
\ No newline at end of file
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/numeric/NCNumericManager.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/numeric/NCNumericManager.scala
index a428ab9..1dab5ef 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/numeric/NCNumericManager.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/numeric/NCNumericManager.scala
@@ -91,8 +91,17 @@ object NCNumericManager extends NCService {
val after = s.drop(num.length)
if (num.nonEmpty && after.nonEmpty) {
- def mkNumeric(u: NCNumericUnit): Option[NCNumeric] =
- Some(NCNumeric(Seq(t), java.lang.Double.valueOf(num), isFractional = isFractional(num), unit = Some(u)))
+ def mkNumeric(u: NCNumericUnit): Option[NCNumeric] = {
+ val toks = Seq(t)
+
+ Some(
+ NCNumeric(
+ toks,
+ java.lang.Double.valueOf(num),
+ isFractional = isFractional(num),
+ unitData = Some(NCNumericUnitData(u, toks)))
+ )
+ }
unitsOrigs.get(after) match {
case Some(u) => mkNumeric(u)
@@ -123,12 +132,14 @@ object NCNumericManager extends NCService {
senWords.indexOfSlice(dtWords) match {
case -1 => None
case idx =>
+ val toks = senToks.slice(idx, idx + dtWords.length)
+
Some(
NCNumeric(
- tokens = senToks.slice(idx, idx + dtWords.length),
+ tokens = toks,
value = dtPeriod.value,
isFractional = false,
- unit = Some(dtPeriod.unit)
+ unitData = Some(NCNumericUnitData(dtPeriod.unit, toks))
)
)
}
@@ -404,7 +415,10 @@ object NCNumericManager extends NCService {
None
}).headOption match {
case Some((unit, unitToks)) =>
- val numWithUnit = NCNumeric(seq ++ unitToks, v, isFractional = isFractional, Some(unit))
+ val numWithUnit =
+ NCNumeric(
+ seq ++ unitToks, v, isFractional = isFractional, Some(NCNumericUnitData(unit, unitToks))
+ )
// If unit name is same as user element name,
// it returns both variants: numeric with unit and without.
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/numeric/NCNumericEnricher.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/numeric/NCNumericEnricher.scala
index b28f198..b89ff99 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/numeric/NCNumericEnricher.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/numeric/NCNumericEnricher.scala
@@ -198,7 +198,7 @@ object NCNumericEnricher extends NCServerEnricher {
private def toString(seq: Seq[NCNlpSentenceToken], sep: String = " ", stem: Boolean = false) =
seq.map(t => if (stem) t.stem else t.normText).mkString(sep)
- private def mkNote(
+ private def mkNotes(
toks: Seq[NCNlpSentenceToken],
from: Double,
fromIncl: Boolean,
@@ -206,9 +206,10 @@ object NCNumericEnricher extends NCServerEnricher {
to: Double,
toIncl: Boolean,
toFractional: Boolean,
- unitOpt: Option[NCNumericUnit]
- ): NCNlpSentenceNote = {
- val params = mutable.ArrayBuffer.empty[(String, Any)] ++
+ unitDataOpt: Option[NCNumericUnitData],
+ ): Seq[NCNlpSentenceNote] = {
+ val params =
+ mutable.ArrayBuffer.empty[(String, Any)] ++
Seq(
"from" -> from,
"fromIncl" -> fromIncl,
@@ -222,14 +223,28 @@ object NCNumericEnricher extends NCServerEnricher {
"isToPositiveInfinity" -> (to == MAX_VALUE)
)
- unitOpt match {
- case Some(unit) =>
- params += "unit" -> unit.name
- params += "unitType" -> unit.unitType
- case None => // No-op.
+ unitDataOpt match {
+ case Some(unitData) =>
+ def extend(): Seq[(String, Any)] = {
+ params += "unit" -> unitData.unit.name
+ params += "unitType" -> unitData.unit.unitType
+
+ params
+ }
+
+ if (unitData.tokens == toks)
+ Seq(NCNlpSentenceNote(toks.map(_.index), "nlpcraft:num", extend():_*))
+ else {
+ val n1 = NCNlpSentenceNote(
+ toks.filter(t => !unitData.tokens.contains(t)).map(_.index), "nlpcraft:num", params.clone():_*
+ )
+ val n2 = NCNlpSentenceNote(toks.map(_.index), "nlpcraft:num", extend():_*)
+
+ Seq(n1, n2)
+ }
+
+ case None => Seq(NCNlpSentenceNote(toks.map(_.index), "nlpcraft:num", params:_*))
}
-
- NCNlpSentenceNote(toks.map(_.index), "nlpcraft:num", params:_*)
}
/**
@@ -274,25 +289,28 @@ object NCNumericEnricher extends NCServerEnricher {
val prepToks = Seq(getBefore(ts1)) ++ ts1 ++ Seq(getBefore(ts2)) ++ ts2
- val badRange = num1.unit.isDefined && num2.unit.isDefined && num1.unit != num2.unit
+ val badRange =
+ num1.unitData.isDefined &&
+ num2.unitData.isDefined &&
+ num1.unitData.get.unit != num2.unitData.get.unit
if (!badRange) {
val unit =
- if (num1.unit.isDefined && num2.unit.isEmpty)
- num1.unit
- else if (num1.unit.isEmpty && num2.unit.isDefined)
- num2.unit
- else if (num1.unit.isEmpty && num2.unit.isEmpty)
+ if (num1.unitData.isDefined && num2.unitData.isEmpty)
+ num1.unitData
+ else if (num1.unitData.isEmpty && num2.unitData.isDefined)
+ num2.unitData
+ else if (num1.unitData.isEmpty && num2.unitData.isEmpty)
None
- else{
- require(num1.unit == num2.unit)
-
- num1.unit
+ else {
+ require(num1.unitData.get.unit == num2.unitData.get.unit)
+
+ Some(NCNumericUnitData(num1.unitData.get.unit, num1.tokens ++ num2.tokens))
}
- val note = p._2 match {
+ val notes = p._2 match {
case BETWEEN_EXCLUSIVE =>
- mkNote(
+ mkNotes(
prepToks,
d1,
fromIncl = false,
@@ -303,7 +321,7 @@ object NCNumericEnricher extends NCServerEnricher {
unit
)
case BETWEEN_INCLUSIVE =>
- mkNote(
+ mkNotes(
prepToks,
d1,
fromIncl = true,
@@ -315,8 +333,9 @@ object NCNumericEnricher extends NCServerEnricher {
)
case _ => throw new AssertionError(s"Illegal note type: ${p._2}.")
}
-
- prepToks.foreach(_.add(note))
+
+ for (note <- notes)
+ prepToks.foreach(_.add(note))
processed ++= ts1
processed ++= ts2
@@ -340,10 +359,10 @@ object NCNumericEnricher extends NCServerEnricher {
processed ++= toks
- val note =
+ val notes =
prep.prepositionType match {
case MORE =>
- mkNote(
+ mkNotes(
toks,
num.value,
fromIncl = false,
@@ -351,10 +370,10 @@ object NCNumericEnricher extends NCServerEnricher {
to = MAX_VALUE,
toIncl = true,
toFractional = num.isFractional,
- num.unit
+ num.unitData
)
case MORE_OR_EQUAL =>
- mkNote(
+ mkNotes(
toks,
num.value,
fromIncl = true,
@@ -362,10 +381,10 @@ object NCNumericEnricher extends NCServerEnricher {
to = MAX_VALUE,
toIncl = true,
toFractional = num.isFractional,
- num.unit
+ num.unitData
)
case LESS =>
- mkNote(
+ mkNotes(
toks,
MIN_VALUE,
fromIncl = true,
@@ -373,10 +392,10 @@ object NCNumericEnricher extends NCServerEnricher {
to = num.value,
toIncl = false,
toFractional = num.isFractional,
- num.unit
+ num.unitData
)
case LESS_OR_EQUAL =>
- mkNote(
+ mkNotes(
toks,
MIN_VALUE,
fromIncl = true,
@@ -384,10 +403,10 @@ object NCNumericEnricher extends NCServerEnricher {
to = num.value,
toIncl = true,
toFractional = num.isFractional,
- num.unit
+ num.unitData
)
case EQUAL =>
- mkNote(
+ mkNotes(
toks,
num.value,
fromIncl = true,
@@ -395,10 +414,10 @@ object NCNumericEnricher extends NCServerEnricher {
to = num.value,
toIncl = true,
toFractional = num.isFractional,
- num.unit
+ num.unitData
)
case NOT_EQUAL =>
- mkNote(
+ mkNotes(
toks,
num.value,
fromIncl = false,
@@ -406,12 +425,13 @@ object NCNumericEnricher extends NCServerEnricher {
to = num.value,
toIncl = false,
toFractional = num.isFractional,
- num.unit
+ num.unitData
)
case _ => throw new AssertionError(s"Illegal note type: ${prep.prepositionType}.")
}
-
- toks.foreach(_.add(note))
+
+ for (note <- notes)
+ toks.foreach(_.add(note))
}
}
@@ -423,7 +443,7 @@ object NCNumericEnricher extends NCServerEnricher {
// Numeric without conditions.
for (num <- nums if !processed.exists(num.tokens.contains)) {
- val note = mkNote(
+ val notes = mkNotes(
num.tokens,
num.value,
fromIncl = true,
@@ -431,12 +451,13 @@ object NCNumericEnricher extends NCServerEnricher {
num.value,
toIncl = true,
num.isFractional,
- num.unit
+ num.unitData
)
processed ++= num.tokens
-
- num.tokens.foreach(_.add(note))
+
+ for (note <- notes)
+ num.tokens.foreach(_.add(note))
}
}
}