You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nlpcraft.apache.org by se...@apache.org on 2021/04/14 07:01:38 UTC
[incubator-nlpcraft] 04/08: WIP.
This is an automated email from the ASF dual-hosted git repository.
sergeykamov pushed a commit to branch NLPCRAFT-287
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
commit 315b1fae49e691ded1efea2a98a1c9b1d8e28aab
Author: Sergey Kamov <sk...@gmail.com>
AuthorDate: Tue Apr 13 13:24:07 2021 +0300
WIP.
---
.../apache/nlpcraft/probe/mgrs/NCProbeModel.scala | 1 +
.../mgrs/nlp/enrichers/model/NCModelEnricher.scala | 258 +++++++++++----------
2 files changed, 134 insertions(+), 125 deletions(-)
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeModel.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeModel.scala
index 1618421..1c21cb9 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeModel.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeModel.scala
@@ -50,4 +50,5 @@ case class NCProbeModel(
) {
def hasDslSynonyms(elemId: String): Boolean = dslSynonyms.contains(elemId)
def hasDslSynonyms: Boolean = dslSynonyms.nonEmpty
+ def hasSparseSynonyms: Boolean = sparseSynonyms.nonEmpty
}
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
index adf1358..f2aa542 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
@@ -475,155 +475,163 @@ object NCModelEnricher extends NCProbeEnricher with DecorateAsScala {
}).seq
}
- @throws[NCE]
- override def enrich(mdl: NCProbeModel, ns: NCNlpSentence, senMeta: Map[String, Serializable], parent: Span = null): Unit = {
- require(isStarted)
-
- val mdlId = mdl.model.getId
- val srvReqId = ns.srvReqId
-
- startScopedSpan("enrich", parent, "srvReqId" → srvReqId, "mdlId" → mdlId, "txt" → ns.text) { span ⇒
- val req = NCRequestImpl(senMeta, srvReqId)
- lazy val h = mkComplexes(mdl, ns)
-
- startScopedSpan("synsProc", span, "srvReqId" → srvReqId, "mdlId" → mdlId, "txt" → ns.text) { _ ⇒
- var state = if (ns.firstProbePhase) SIMPLE else DSL_NEXT
+ /**
+ *
+ * @param mdl
+ * @param ns
+ * @param combosToks
+ * @param state
+ * @param req
+ */
+ private def execute(
+ mdl: NCProbeModel,
+ ns: NCNlpSentence,
+ combosToks: Seq[Seq[NlpToken]],
+ state: State, req: NCRequest,
+ h: ⇒ ComplexHolder, span: Span
+ ): Unit =
+ startScopedSpan("execute", span, "srvReqId" → ns.srvReqId, "mdlId" → mdl.model.getId, "txt" → ns.text) { _ ⇒
+ if (DEEP_DEBUG)
+ println(s"Execution started [state=$state]")
- ns.firstProbePhase = false
+ val contCache =
+ mutable.HashMap.empty[String, mutable.ArrayBuffer[Seq[Int]]] ++
+ mdl.elements.keys.map(k ⇒ k → mutable.ArrayBuffer.empty[Seq[Int]])
+ lazy val dslCache = mutable.HashSet.empty[Seq[Complex]]
- val combosToks = combos(ns)
+ var found = false
- def go(): Unit = {
- val contCache = mutable.HashMap.empty[String, mutable.ArrayBuffer[Seq[Int]]] ++ mdl.elements.keys.map(k ⇒ k → mutable.ArrayBuffer.empty[Seq[Int]])
- lazy val dslCache = mutable.HashSet.empty[Seq[Complex]]
+ def add(typ: String, elm: NCElement, res: Seq[NlpToken], allToksIdxs: Seq[Int], s: Synonym, parts: Seq[TokType] = Seq.empty): Unit = {
+ found = true
+ val resIdxs = res.map(_.index)
- var found = false
+ val continuous = U.isContinuous(resIdxs.sorted)
- def add(typ: String, elm: NCElement, res: Seq[NlpToken], allToksIdxs: Seq[Int], s: Synonym, parts: Seq[TokType] = Seq.empty): Unit = {
- found = true
- val resIdxs = res.map(_.index)
+ if (continuous && resIdxs == allToksIdxs)
+ contCache(elm.getId) += allToksIdxs
- val continuous = U.isContinuous(resIdxs.sorted)
+ val added = !alreadyMarked(res, allToksIdxs, continuous, elm.getId)
- if (continuous && resIdxs == allToksIdxs)
- contCache(elm.getId) += allToksIdxs
+ if (added) {
+ val direct = s.isDirect && U.isIncreased(resIdxs)
- val added = !alreadyMarked(res, allToksIdxs, continuous, elm.getId)
+ mark(ns, elm, res, direct, syn = Some(s), metaOpt = None, parts, allToksIdxs, continuous)
+ }
- if (added) {
- val direct = s.isDirect && U.isIncreased(resIdxs)
+ if (DEEP_DEBUG)
+ println(
+ s"${if (added) "Added" else "Skipped"} element [" +
+ s"id=${elm.getId}, " +
+ s"type=$typ, " +
+ s"text='${res.map(_.origText).mkString(" ")}', " +
+ s"indexes=${resIdxs.mkString("[", ",", "]")}, " +
+ s"allTokensIndexes=${allToksIdxs.mkString("[", ",", "]")}, " +
+ s"continuous=$continuous, " +
+ s"synonym=$s" +
+ s"]"
+ )
+ }
- mark(ns, elm, res, direct, syn = Some(s), metaOpt = None, parts, allToksIdxs, continuous)
+ for (toks ← combosToks) {
+ val tokIdxs = toks.map(_.index)
+ lazy val dslCombs: Seq[Seq[Complex]] = mkComplexCombinations(h, toks, dslCache.toSet)
+ lazy val tokStems = toks.map(_.stem).mkString(" ")
+
+ // Attempt to match each element.
+ for (
+ elm ← mdl.elements.values;
+ elemId = elm.getId
+ if
+ !contCache(elemId).exists(_.containsSlice(tokIdxs)) &&
+ // Checks whole tokens slice.
+ !alreadyMarked(toks, tokIdxs, continuous = true, elemId)
+ ) {
+ // 1. SIMPLE.
+ found = false
+
+ val simpleEnabled: Boolean =
+ state match {
+ case SIMPLE ⇒ !mdl.hasDslSynonyms(elemId)
+ case DSL_FIRST ⇒ mdl.hasDslSynonyms(elemId)
+ case _ ⇒ false
}
- if (DEEP_DEBUG)
- println(
- s"${if (added) "Added" else "Skipped"} element [" +
- s"id=${elm.getId}, " +
- s"type=$typ, " +
- s"text='${res.map(_.origText).mkString(" ")}', " +
- s"indexes=${resIdxs.mkString("[", ",", "]")}, " +
- s"allTokensIndexes=${allToksIdxs.mkString("[", ",", "]")}, " +
- s"continuous=$continuous, " +
- s"synonym=$s" +
- s"]"
- )
- }
+ // 1.1 Continuous.
+ if (simpleEnabled && !found)
+ fastAccess(mdl.continuousSynonyms, elemId, toks.length) match {
+ case Some(h) ⇒
+ def tryMap(syns: Map[String, Synonym], notFound: () ⇒ Unit): Unit =
+ syns.get(tokStems) match {
+ case Some(s) ⇒ add("simple continuous", elm, toks, tokIdxs, s)
+ case None ⇒ notFound()
+ }
- for (toks ← combosToks) {
- val tokIdxs = toks.map(_.index)
- lazy val dslCombs: Seq[Seq[Complex]] = mkComplexCombinations(h, toks, dslCache.toSet)
- lazy val tokStems = toks.map(_.stem).mkString(" ")
-
- // Attempt to match each element.
- for (
- elm ← mdl.elements.values;
- elemId = elm.getId
- if
- !contCache(elemId).exists(_.containsSlice(tokIdxs)) &&
- // Checks whole tokens slice.
- !alreadyMarked(toks, tokIdxs, continuous = true, elemId)
- ) {
- // 1. SIMPLE.
- found = false
-
- val simpleEnabled: Boolean =
- state match {
- case SIMPLE ⇒ !mdl.hasDslSynonyms(elemId)
- case DSL_FIRST ⇒ mdl.hasDslSynonyms(elemId)
- case _ ⇒ false
- }
+ def tryScan(syns: Seq[Synonym]): Unit =
+ for (s ← syns if !found)
+ if (s.isMatch(toks))
+ add("simple continuous scan", elm, toks, tokIdxs, s)
- // 1.1 Direct.
- if (simpleEnabled && !found)
- fastAccess(mdl.continuousSynonyms, elemId, toks.length) match {
- case Some(h) ⇒
- def tryMap(syns: Map[String, Synonym], notFound: () ⇒ Unit): Unit =
- syns.get(tokStems) match {
- case Some(s) ⇒ add("direct simple", elm, toks, tokIdxs, s)
- case None ⇒ notFound()
- }
-
- def tryScan(syns: Seq[Synonym]): Unit =
- for (s ← syns if !found)
- if (s.isMatch(toks))
- add("scan simple", elm, toks, tokIdxs, s)
-
- tryMap(
- h.txtDirectSynonyms,
- () ⇒ {
- tryScan(h.notTxtDirectSynonyms)
-
- if (!found)
- tryMap(h.txtNotDirectSynonyms, () ⇒ tryScan(h.notTxtNotDirectSynonyms))
- }
- )
- case None ⇒ // No-op.
- }
+ tryMap(
+ h.txtDirectSynonyms,
+ () ⇒ {
+ tryScan(h.notTxtDirectSynonyms)
- // 1.2 Sparse.
- if (simpleEnabled && !found)
- for (s ← get(mdl.sparseSynonyms, elemId) if !found)
- s.trySparseMatch(toks) match {
- case Some(res) ⇒ add("sparse simple", elm, res, tokIdxs, s)
- case None ⇒ // No-op.
+ if (!found)
+ tryMap(h.txtNotDirectSynonyms, () ⇒ tryScan(h.notTxtNotDirectSynonyms))
}
+ )
+ case None ⇒ // No-op.
+ }
- // 2. DSL.
- if (state != SIMPLE && mdl.dslSynonyms.nonEmpty) {
- found = false
-
- // 2.1 Sparse.
- if (mdl.hasDslSynonyms)
- for (s ← get(mdl.dslSynonyms, elemId); comb ← dslCombs if !found)
- s.trySparseMatch(comb.map(_.data), req) match {
- case Some(res) ⇒
- add("sparse DSL", elm, toTokens(res, ns), tokIdxs, s, toParts(res, s))
- dslCache += comb
- case None ⇒ // No-op.
- }
- // 2.2 Direct.
- else
- for (s ← get(mdl.dslSynonyms, elemId); comb ← dslCombs if !found)
- if (s.isMatch(comb.map(_.data), req)) {
- add("direct DSL", elm, toks, tokIdxs, s, toPartsComplex(comb, s))
- dslCache += comb
- }
+ // 1.2 Sparse.
+ if (simpleEnabled && !found)
+ for (s ← get(mdl.sparseSynonyms, elemId) if !found)
+ s.trySparseMatch(toks) match {
+ case Some(res) ⇒ add("simple sparse", elm, res, tokIdxs, s)
+ case None ⇒ // No-op.
}
- }
+
+ // 2. DSL.
+ if (state != SIMPLE && mdl.dslSynonyms.nonEmpty) {
+ found = false
+
+ // 2.1 Sparse.
+ if (mdl.hasSparseSynonyms)
+ for (s ← get(mdl.dslSynonyms, elemId); comb ← dslCombs if !found)
+ s.trySparseMatch(comb.map(_.data), req) match {
+ case Some(res) ⇒
+ add("DSL sparse", elm, toTokens(res, ns), tokIdxs, s, toParts(res, s))
+ dslCache += comb
+ case None ⇒ // No-op.
+ }
+ // 2.2 Continuous.
+ else
+ for (s ← get(mdl.dslSynonyms, elemId); comb ← dslCombs if !found)
+ if (s.isMatch(comb.map(_.data), req)) {
+ add("DSL continuous", elm, toks, tokIdxs, s, toPartsComplex(comb, s))
+ dslCache += comb
+ }
}
}
+ }
+ }
- if (DEEP_DEBUG)
- println(s"Execution started with state: $state.")
+ @throws[NCE]
+ override def enrich(mdl: NCProbeModel, ns: NCNlpSentence, senMeta: Map[String, Serializable], parent: Span = null): Unit = {
+ require(isStarted)
- go()
+ startScopedSpan("enrich", parent, "srvReqId" → ns.srvReqId, "mdlId" → mdl.model.getId, "txt" → ns.text) { span ⇒
+ val req = NCRequestImpl(senMeta, ns.srvReqId)
+ val combosToks = combos(ns)
- if (state == SIMPLE) {
- state = DSL_FIRST
+ lazy val h = mkComplexes(mdl, ns)
- go()
- }
+ execute(mdl, ns, combosToks, if (ns.firstProbePhase) SIMPLE else DSL_NEXT, req, h, parent)
+
+ if (ns.firstProbePhase) {
+ ns.firstProbePhase = false
+
+ execute(mdl, ns, combosToks, DSL_FIRST, req, h, parent)
}
processParsers(mdl, ns, span, req)