You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nlpcraft.apache.org by se...@apache.org on 2021/04/14 07:01:38 UTC

[incubator-nlpcraft] 04/08: WIP.

This is an automated email from the ASF dual-hosted git repository.

sergeykamov pushed a commit to branch NLPCRAFT-287
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git

commit 315b1fae49e691ded1efea2a98a1c9b1d8e28aab
Author: Sergey Kamov <sk...@gmail.com>
AuthorDate: Tue Apr 13 13:24:07 2021 +0300

    WIP.
---
 .../apache/nlpcraft/probe/mgrs/NCProbeModel.scala  |   1 +
 .../mgrs/nlp/enrichers/model/NCModelEnricher.scala | 258 +++++++++++----------
 2 files changed, 134 insertions(+), 125 deletions(-)

diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeModel.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeModel.scala
index 1618421..1c21cb9 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeModel.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeModel.scala
@@ -50,4 +50,5 @@ case class NCProbeModel(
 ) {
     def hasDslSynonyms(elemId: String): Boolean = dslSynonyms.contains(elemId)
     def hasDslSynonyms: Boolean = dslSynonyms.nonEmpty
+    def hasSparseSynonyms: Boolean = sparseSynonyms.nonEmpty
 }
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
index adf1358..f2aa542 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
@@ -475,155 +475,163 @@ object NCModelEnricher extends NCProbeEnricher with DecorateAsScala {
             }).seq
     }
 
-    @throws[NCE]
-    override def enrich(mdl: NCProbeModel, ns: NCNlpSentence, senMeta: Map[String, Serializable], parent: Span = null): Unit = {
-        require(isStarted)
-
-        val mdlId = mdl.model.getId
-        val srvReqId = ns.srvReqId
-
-        startScopedSpan("enrich", parent, "srvReqId" → srvReqId, "mdlId" → mdlId, "txt" → ns.text) { span ⇒
-            val req = NCRequestImpl(senMeta, srvReqId)
-            lazy val h = mkComplexes(mdl, ns)
-
-            startScopedSpan("synsProc", span, "srvReqId" → srvReqId, "mdlId" → mdlId, "txt" → ns.text) { _ ⇒
-                var state = if (ns.firstProbePhase) SIMPLE else DSL_NEXT
+    /**
+      *
+      * @param mdl
+      * @param ns
+      * @param combosToks
+      * @param state
+      * @param req
+      */
+    private def execute(
+        mdl: NCProbeModel,
+        ns: NCNlpSentence,
+        combosToks: Seq[Seq[NlpToken]],
+        state: State, req: NCRequest,
+        h: ⇒ ComplexHolder, span: Span
+    ): Unit =
+        startScopedSpan("execute", span, "srvReqId" → ns.srvReqId, "mdlId" → mdl.model.getId, "txt" → ns.text) { _ ⇒
+            if (DEEP_DEBUG)
+                println(s"Execution started [state=$state]")
 
-                ns.firstProbePhase = false
+            val contCache =
+                mutable.HashMap.empty[String, mutable.ArrayBuffer[Seq[Int]]] ++
+                    mdl.elements.keys.map(k ⇒ k → mutable.ArrayBuffer.empty[Seq[Int]])
+            lazy val dslCache = mutable.HashSet.empty[Seq[Complex]]
 
-                val combosToks = combos(ns)
+            var found = false
 
-                def go(): Unit = {
-                    val contCache = mutable.HashMap.empty[String, mutable.ArrayBuffer[Seq[Int]]] ++ mdl.elements.keys.map(k ⇒ k → mutable.ArrayBuffer.empty[Seq[Int]])
-                    lazy val dslCache = mutable.HashSet.empty[Seq[Complex]]
+            def add(typ: String, elm: NCElement, res: Seq[NlpToken], allToksIdxs: Seq[Int], s: Synonym, parts: Seq[TokType] = Seq.empty): Unit = {
+                found = true
+                val resIdxs = res.map(_.index)
 
-                    var found = false
+                val continuous = U.isContinuous(resIdxs.sorted)
 
-                    def add(typ: String, elm: NCElement, res: Seq[NlpToken], allToksIdxs: Seq[Int], s: Synonym, parts: Seq[TokType] = Seq.empty): Unit = {
-                        found = true
-                        val resIdxs = res.map(_.index)
+                if (continuous && resIdxs == allToksIdxs)
+                    contCache(elm.getId) += allToksIdxs
 
-                        val continuous = U.isContinuous(resIdxs.sorted)
+                val added = !alreadyMarked(res, allToksIdxs, continuous, elm.getId)
 
-                        if (continuous && resIdxs == allToksIdxs)
-                            contCache(elm.getId) += allToksIdxs
+                if (added) {
+                    val direct = s.isDirect && U.isIncreased(resIdxs)
 
-                        val added = !alreadyMarked(res, allToksIdxs, continuous, elm.getId)
+                    mark(ns, elm, res, direct, syn = Some(s), metaOpt = None, parts, allToksIdxs, continuous)
+                }
 
-                        if (added) {
-                            val direct = s.isDirect && U.isIncreased(resIdxs)
+                if (DEEP_DEBUG)
+                    println(
+                        s"${if (added) "Added" else "Skipped"} element [" +
+                            s"id=${elm.getId}, " +
+                            s"type=$typ, " +
+                            s"text='${res.map(_.origText).mkString(" ")}', " +
+                            s"indexes=${resIdxs.mkString("[", ",", "]")}, " +
+                            s"allTokensIndexes=${allToksIdxs.mkString("[", ",", "]")}, " +
+                            s"continuous=$continuous, " +
+                            s"synonym=$s" +
+                            s"]"
+                    )
+            }
 
-                            mark(ns, elm, res, direct, syn = Some(s), metaOpt = None, parts, allToksIdxs, continuous)
+            for (toks ← combosToks) {
+                val tokIdxs = toks.map(_.index)
+                lazy val dslCombs: Seq[Seq[Complex]] = mkComplexCombinations(h, toks, dslCache.toSet)
+                lazy val tokStems = toks.map(_.stem).mkString(" ")
+
+                // Attempt to match each element.
+                for (
+                    elm ← mdl.elements.values;
+                        elemId = elm.getId
+                        if
+                        !contCache(elemId).exists(_.containsSlice(tokIdxs)) &&
+                            // Checks whole tokens slice.
+                            !alreadyMarked(toks, tokIdxs, continuous = true, elemId)
+                ) {
+                    // 1. SIMPLE.
+                    found = false
+
+                    val simpleEnabled: Boolean =
+                        state match {
+                            case SIMPLE ⇒ !mdl.hasDslSynonyms(elemId)
+                            case DSL_FIRST ⇒ mdl.hasDslSynonyms(elemId)
+                            case _ ⇒ false
                         }
 
-                        if (DEEP_DEBUG)
-                            println(
-                                s"${if (added) "Added" else "Skipped"} element [" +
-                                    s"id=${elm.getId}, " +
-                                    s"type=$typ, " +
-                                    s"text='${res.map(_.origText).mkString(" ")}', " +
-                                    s"indexes=${resIdxs.mkString("[", ",", "]")}, " +
-                                    s"allTokensIndexes=${allToksIdxs.mkString("[", ",", "]")}, " +
-                                    s"continuous=$continuous, " +
-                                    s"synonym=$s" +
-                                    s"]"
-                            )
-                    }
+                    // 1.1 Continuous.
+                    if (simpleEnabled && !found)
+                        fastAccess(mdl.continuousSynonyms, elemId, toks.length) match {
+                            case Some(h) ⇒
+                                def tryMap(syns: Map[String, Synonym], notFound: () ⇒ Unit): Unit =
+                                    syns.get(tokStems) match {
+                                        case Some(s) ⇒ add("simple continuous", elm, toks, tokIdxs, s)
+                                        case None ⇒ notFound()
+                                    }
 
-                    for (toks ← combosToks) {
-                        val tokIdxs = toks.map(_.index)
-                        lazy val dslCombs: Seq[Seq[Complex]] = mkComplexCombinations(h, toks, dslCache.toSet)
-                        lazy val tokStems = toks.map(_.stem).mkString(" ")
-
-                        // Attempt to match each element.
-                        for (
-                            elm ← mdl.elements.values;
-                            elemId = elm.getId
-                            if
-                                !contCache(elemId).exists(_.containsSlice(tokIdxs)) &&
-                                 // Checks whole tokens slice.
-                                !alreadyMarked(toks, tokIdxs, continuous = true, elemId)
-                        ) {
-                            // 1. SIMPLE.
-                            found = false
-
-                            val simpleEnabled: Boolean =
-                                state match {
-                                    case SIMPLE ⇒ !mdl.hasDslSynonyms(elemId)
-                                    case DSL_FIRST ⇒ mdl.hasDslSynonyms(elemId)
-                                    case _ ⇒ false
-                                }
+                                def tryScan(syns: Seq[Synonym]): Unit =
+                                    for (s ← syns if !found)
+                                        if (s.isMatch(toks))
+                                            add("simple continuous scan", elm, toks, tokIdxs, s)
 
-                            // 1.1 Direct.
-                            if (simpleEnabled && !found)
-                                fastAccess(mdl.continuousSynonyms, elemId, toks.length) match {
-                                    case Some(h) ⇒
-                                        def tryMap(syns: Map[String, Synonym], notFound: () ⇒ Unit): Unit =
-                                            syns.get(tokStems) match {
-                                                case Some(s) ⇒ add("direct simple", elm, toks, tokIdxs, s)
-                                                case None ⇒ notFound()
-                                            }
-
-                                        def tryScan(syns: Seq[Synonym]): Unit =
-                                            for (s ← syns if !found)
-                                                if (s.isMatch(toks))
-                                                    add("scan simple", elm, toks, tokIdxs, s)
-
-                                        tryMap(
-                                            h.txtDirectSynonyms,
-                                            () ⇒ {
-                                                tryScan(h.notTxtDirectSynonyms)
-
-                                                if (!found)
-                                                    tryMap(h.txtNotDirectSynonyms, () ⇒ tryScan(h.notTxtNotDirectSynonyms))
-                                            }
-                                        )
-                                    case None ⇒ // No-op.
-                                }
+                                tryMap(
+                                    h.txtDirectSynonyms,
+                                    () ⇒ {
+                                        tryScan(h.notTxtDirectSynonyms)
 
-                            // 1.2 Sparse.
-                            if (simpleEnabled && !found)
-                                for (s ← get(mdl.sparseSynonyms, elemId) if !found)
-                                    s.trySparseMatch(toks) match {
-                                        case Some(res) ⇒ add("sparse simple", elm, res, tokIdxs, s)
-                                        case None ⇒ // No-op.
+                                        if (!found)
+                                            tryMap(h.txtNotDirectSynonyms, () ⇒ tryScan(h.notTxtNotDirectSynonyms))
                                     }
+                                )
+                            case None ⇒ // No-op.
+                        }
 
-                            // 2. DSL.
-                            if (state != SIMPLE && mdl.dslSynonyms.nonEmpty) {
-                                found = false
-
-                                // 2.1 Sparse.
-                                if (mdl.hasDslSynonyms)
-                                    for (s ← get(mdl.dslSynonyms, elemId); comb ← dslCombs if !found)
-                                        s.trySparseMatch(comb.map(_.data), req) match {
-                                            case Some(res) ⇒
-                                                add("sparse DSL", elm, toTokens(res, ns), tokIdxs, s, toParts(res, s))
-                                                dslCache += comb
-                                            case None ⇒ // No-op.
-                                        }
-                                // 2.2 Direct.
-                                else
-                                    for (s ← get(mdl.dslSynonyms, elemId); comb ← dslCombs if !found)
-                                        if (s.isMatch(comb.map(_.data), req)) {
-                                            add("direct DSL", elm, toks, tokIdxs, s, toPartsComplex(comb, s))
-                                            dslCache += comb
-                                        }
+                    // 1.2 Sparse.
+                    if (simpleEnabled && !found)
+                        for (s ← get(mdl.sparseSynonyms, elemId) if !found)
+                            s.trySparseMatch(toks) match {
+                                case Some(res) ⇒ add("simple sparse", elm, res, tokIdxs, s)
+                                case None ⇒ // No-op.
                             }
-                        }
+
+                    // 2. DSL.
+                    if (state != SIMPLE && mdl.dslSynonyms.nonEmpty) {
+                        found = false
+
+                        // 2.1 Sparse.
+                        if (mdl.hasSparseSynonyms)
+                            for (s ← get(mdl.dslSynonyms, elemId); comb ← dslCombs if !found)
+                                s.trySparseMatch(comb.map(_.data), req) match {
+                                    case Some(res) ⇒
+                                        add("DSL sparse", elm, toTokens(res, ns), tokIdxs, s, toParts(res, s))
+                                        dslCache += comb
+                                    case None ⇒ // No-op.
+                                }
+                        // 2.2 Continuous.
+                        else
+                            for (s ← get(mdl.dslSynonyms, elemId); comb ← dslCombs if !found)
+                                if (s.isMatch(comb.map(_.data), req)) {
+                                    add("DSL continuous", elm, toks, tokIdxs, s, toPartsComplex(comb, s))
+                                    dslCache += comb
+                                }
                     }
                 }
+            }
+        }
 
-                if (DEEP_DEBUG)
-                    println(s"Execution started with state: $state.")
+    @throws[NCE]
+    override def enrich(mdl: NCProbeModel, ns: NCNlpSentence, senMeta: Map[String, Serializable], parent: Span = null): Unit = {
+        require(isStarted)
 
-                go()
+        startScopedSpan("enrich", parent, "srvReqId" → ns.srvReqId, "mdlId" → mdl.model.getId, "txt" → ns.text) { span ⇒
+            val req = NCRequestImpl(senMeta, ns.srvReqId)
+            val combosToks = combos(ns)
 
-                if (state == SIMPLE) {
-                    state = DSL_FIRST
+            lazy val h = mkComplexes(mdl, ns)
 
-                    go()
-                }
+            execute(mdl, ns, combosToks, if (ns.firstProbePhase) SIMPLE else DSL_NEXT, req, h, parent)
+
+            if (ns.firstProbePhase) {
+                ns.firstProbePhase = false
+
+                execute(mdl, ns, combosToks, DSL_FIRST, req, h, parent)
             }
 
             processParsers(mdl, ns, span, req)