You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nlpcraft.apache.org by se...@apache.org on 2021/09/30 15:28:48 UTC
[incubator-nlpcraft] branch NLPCRAFT-50-1 updated: Functions
enricher.
This is an automated email from the ASF dual-hosted git repository.
sergeykamov pushed a commit to branch NLPCRAFT-50-1
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
The following commit(s) were added to refs/heads/NLPCRAFT-50-1 by this push:
new 63ba338 Functions enricher.
63ba338 is described below
commit 63ba3386c0f5ac09d10935121b72e08435c057fa
Author: Sergey Kamov <sk...@gmail.com>
AuthorDate: Thu Sep 30 17:14:44 2021 +0300
Functions enricher.
---
.../enrichers/function/NCFunctionEnricher.scala | 134 ++++++++++++++-------
.../function/NCEnricherFunctionSpec.scala | 32 ++++-
2 files changed, 124 insertions(+), 42 deletions(-)
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/function/NCFunctionEnricher.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/function/NCFunctionEnricher.scala
index d68c2ac..5a16f62 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/function/NCFunctionEnricher.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/function/NCFunctionEnricher.scala
@@ -19,12 +19,14 @@ package org.apache.nlpcraft.probe.mgrs.nlp.enrichers.function
import io.opencensus.trace.Span
import org.apache.nlpcraft.common.NCService
+import org.apache.nlpcraft.common.makro.NCMacroParser
import org.apache.nlpcraft.common.nlp.core.NCNlpCoreManager
import org.apache.nlpcraft.common.nlp.{NCNlpSentence, NCNlpSentenceNote, NCNlpSentenceToken}
import org.apache.nlpcraft.probe.mgrs.NCProbeModel
import org.apache.nlpcraft.probe.mgrs.nlp.NCProbeEnricher
import java.util.Collections
+import scala.collection.mutable
import scala.jdk.CollectionConverters.{MapHasAsScala, SetHasAsScala}
/**
@@ -33,33 +35,48 @@ import scala.jdk.CollectionConverters.{MapHasAsScala, SetHasAsScala}
object NCFunctionEnricher extends NCProbeEnricher {
private final val TOK_ID = "nlpcraft:function"
- private case class SingeFunc(name: String, synonyms: Seq[String])
+ private case class SingleFuncDef(name: String, synonyms: String*)
- private object SingeFunc {
- def apply(name: String, syns:String*): SingeFunc = SingeFunc(name, syns)
- }
+ private final val FUNC_NUM_SINGLE = {
+ import org.apache.nlpcraft.probe.mgrs.nlp.enrichers.function.NCFunctionEnricher.{SingleFuncDef => F}
- private final val FUNC_NUM_SINGLE =
Set(
- SingeFunc("sin", "sine"),
- SingeFunc("cos", "cosine"),
- SingeFunc("tan", "tangent"),
- SingeFunc("cot", "cotangent"),
- SingeFunc("round"),
- SingeFunc("floor"),
- SingeFunc("max", "maximum"),
- SingeFunc("min", "minimum"),
- SingeFunc("avg", "average"),
- SingeFunc("sum", "summary")
+ F("sin", "sine"),
+ F("cos", "cosine"),
+ F("tan", "tangent"),
+ F("cot", "cotangent"),
+
+ F("round"),
+ F("floor"),
+
+ F("max", "{maximum|max} {of|_}"),
+ F("min", "{minimum|min} {of|_}"),
+ F("avg", "{average|avg} {of|_}"),
+ F("sum", "{summary|sum} {of|_}"),
+ F("count", "count {of|_}"),
+ F("first", "first {of|_}"),
+ F("last", "last {of|_}")
)
+ }
+
- @volatile private var funcNumSingleData: Map[String, String] = _
+ @volatile private var funcSingle: Map[String, String] = _
override def start(parent: Span = null): NCService = startScopedSpan("start", parent) { _ =>
ackStarting()
- funcNumSingleData =
- FUNC_NUM_SINGLE.flatMap(p => (p.synonyms :+ p.name).toSet.map(NCNlpCoreManager.stem).map(_ -> p.name).toMap).toMap
+ val parser = new NCMacroParser
+
+ funcSingle =
+ FUNC_NUM_SINGLE.flatMap(
+ func =>
+ (func.synonyms :+ func.name).
+ toSet.flatMap(parser.expand).
+ map(_.split(" ").map(_.strip).filter(_.nonEmpty).map(NCNlpCoreManager.stem)).
+ map(stems => stems.mkString(" ")).
+ map { syn => syn -> func.name }.
+ toMap
+ ).toMap
ackStarted()
}
@@ -71,7 +88,7 @@ object NCFunctionEnricher extends NCProbeEnricher {
override def stop(parent: Span = null): Unit = startScopedSpan("stop", parent) { _ =>
ackStopping()
- funcNumSingleData = null
+ funcSingle = null
ackStopped()
}
@@ -81,34 +98,69 @@ object NCFunctionEnricher extends NCProbeEnricher {
val restricted =
mdl.model.getRestrictedCombinations.asScala.getOrElse(TOK_ID, java.util.Collections.emptySet()).
- asScala
+ asScala.toSet
startScopedSpan(
"enrich", parent, "srvReqId" -> ns.srvReqId, "mdlId" -> mdl.model.getId, "txt" -> ns.text
- ) { _ =>
- val buf = collection.mutable.ArrayBuffer.empty[Seq[NCNlpSentenceToken]]
-
- for (toks <- ns.tokenMixWithStopWords() if toks.size > 1 && !buf.exists(_.containsSlice(toks))) {
- funcNumSingleData.get(toks.head.stem) match {
- case Some(f) =>
- val users = toks.tail.filter(_.isUser)
-
- if (users.size == 1 && toks.tail.forall(t => users.contains(t) || t.isStopWord)) {
- for (typ <- users.head.filter(_.isUser).map(_.noteType) if !restricted.contains(typ))
- toks.head.add(
- NCNlpSentenceNote(
- Seq(toks.head.index),
- TOK_ID,
- "type" -> f,
- "indexes" -> Collections.singleton(users.head.index),
- "note" -> typ
- )
- )
- }
+ ) { _ => processSingleFunctions(ns, restricted)
+ }
+ }
+ /**
+ *
+ * @param ns
+ * @param restricted
+ */
+ private def processSingleFunctions(ns: NCNlpSentence, restricted: Set[String]): Unit = {
+ val buf = mutable.ArrayBuffer.empty[Seq[NCNlpSentenceToken]]
+
+ for (toks <- ns.tokenMixWithStopWords() if !buf.exists(_.exists(toks.contains))) {
+ val stops = toks.filter(_.isStopWord)
+
+ val toksAllCombs =
+ (0 to stops.size).
+ flatMap(i => stops.combinations(i).map(comb => toks.filter(t => !comb.contains(t)))).
+ filter(_.nonEmpty)
+
+ toksAllCombs.to(LazyList).
+ flatMap(comb =>
+ funcSingle.get(comb.map(_.stem).mkString(" ")) match {
+ case Some(funName) => Some(comb -> funName)
+ case None => None
+ }
+ ).headOption match {
+ case Some((comb, funName)) =>
+ buf += toks
+
+ val after = ns.tokens.drop(comb.last.index + 1)
+
+ after.find(_.isUser) match {
+ case Some(userTok) =>
+ val betweenFuncAndUser = after.takeWhile(_ != userTok)
+
+ if (betweenFuncAndUser.isEmpty || betweenFuncAndUser.forall(_.isStopWord)) {
+ val usrNoteTypes =
+ userTok.flatMap(n =>
+ if (n.isUser && !restricted.contains(n.noteType)) Some(n.noteType) else None
+ )
+
+ for (usrNoteType <- usrNoteTypes) {
+ val note =
+ NCNlpSentenceNote(
+ comb.map(_.index).toSeq,
+ TOK_ID,
+ "type" -> funName,
+ "indexes" -> Collections.singletonList(userTok.index),
+ "note" -> usrNoteType
+ )
+ comb.foreach(_.add(note))
+ }
+ }
+
+ case None => // No-op.
+ }
case None => // No-op.
}
- }
}
}
}
diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/function/NCEnricherFunctionSpec.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/function/NCEnricherFunctionSpec.scala
index 6cc1929..0afd3c9 100644
--- a/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/function/NCEnricherFunctionSpec.scala
+++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/function/NCEnricherFunctionSpec.scala
@@ -42,6 +42,23 @@ class NCEnricherFunctionSpec extends NCEnricherBaseSpec {
)
),
_ => checkAll(
+ "max of A test",
+ Seq(
+ fun(text = "max of", `type` = "max", index = 1, note = "A"),
+ usr(text = "A", id = "A"),
+ nlp(text = "test")
+ )
+ ),
+ _ => checkAll(
+ "max the of the A test",
+ Seq(
+ fun(text = "max of", `type` = "max", index = 2, note = "A"),
+ nlp(text = "the the", isStop = true),
+ usr(text = "A", id = "A"),
+ nlp(text = "test")
+ )
+ ),
+ _ => checkAll(
"maximum the A, maximum the the A",
Seq(
fun(text = "maximum", `type` = "max", index = 2, note = "A"),
@@ -66,7 +83,20 @@ class NCEnricherFunctionSpec extends NCEnricherBaseSpec {
nlp(text = "the", isStop = true),
usr(text = "A", id = "A")
)
+ ),
+ _ => checkAll(
+ "maximum of of the A the A, maximum test A",
+ Seq(
+ fun(text = "maximum of", `type` = "max", index = 2, note = "A"),
+ nlp(text = "of the", isStop = true),
+ usr(text = "A", id = "A"),
+ nlp(text = "the", isStop = true),
+ usr(text = "A", id = "A"),
+ nlp(text = ",", isStop = true),
+ nlp(text = "maximum"),
+ nlp(text = "test"),
+ usr(text = "A", id = "A")
+ )
)
-
)
}