You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nlpcraft.apache.org by se...@apache.org on 2021/04/06 09:15:14 UTC

[incubator-nlpcraft] branch NLPCRAFT-287 updated (261758b -> 0b6ffaf)

This is an automated email from the ASF dual-hosted git repository.

sergeykamov pushed a change to branch NLPCRAFT-287
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git.


    from 261758b  WIP.
     new ac555ef  WIP.
     new 6c23bec  WIP.
     new c4d2d15  WIP.
     new 6659315  WIP.
     add fc814d9  Update NCDialogFlowItem.java
     new 0b6ffaf  Merge branch 'master' into NLPCRAFT-287

The 5 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 .../org/apache/nlpcraft/common/util/NCUtils.scala  |  13 +-
 .../apache/nlpcraft/model/NCDialogFlowItem.java    |   2 +-
 .../nlpcraft/probe/mgrs/NCProbeSynonym.scala       |  59 ++++----
 .../nlpcraft/probe/mgrs/model/NCModelManager.scala |  22 +--
 .../mgrs/nlp/enrichers/model/NCModelEnricher.scala | 148 +++++++++++----------
 .../probe/mgrs/sentence/NCSentenceManager.scala    |   9 +-
 .../model/NCEnricherNestedModelSpec2.scala         |   4 +-
 .../model/NCEnricherNestedModelSpec4.scala         |   3 +
 8 files changed, 135 insertions(+), 125 deletions(-)

[incubator-nlpcraft] 02/05: WIP.

Posted by se...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

sergeykamov pushed a commit to branch NLPCRAFT-287
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git

commit 6c23bec681b90813cfda7d34c4dd6ccb1fe7465a
Author: Sergey Kamov <sk...@gmail.com>
AuthorDate: Mon Apr 5 22:44:46 2021 +0300

    WIP.
---
 .../nlpcraft/probe/mgrs/NCProbeSynonym.scala       | 40 ++++++++++++----------
 .../mgrs/nlp/enrichers/model/NCModelEnricher.scala | 16 +++++----
 .../model/NCEnricherNestedModelSpec4.scala         |  3 ++
 3 files changed, 34 insertions(+), 25 deletions(-)

diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeSynonym.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeSynonym.scala
index fffd476..5324304 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeSynonym.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeSynonym.scala
@@ -92,35 +92,37 @@ class NCProbeSynonym(
         require(toks != null)
         require(toks.nonEmpty)
 
-        lazy val buf = mutable.ArrayBuffer.empty[T]
+        lazy val res = mutable.ArrayBuffer.empty[T]
+        lazy val all = mutable.HashSet.empty[T]
+
         var state = 0
 
-        for (chunk ← this if state != -1)
-            toks.find(t ⇒ {
+        for (chunk ← this if state != -1) {
+            val seq =
                 if (state == 0) {
                     state = 1
 
-                    isMatch(t, chunk) && !buf.contains(t)
+                    toks.filter(t ⇒ isMatch(t, chunk))
                 }
                 else
-                    !buf.contains(t) && isMatch(t, chunk)
-            }) match {
-                case Some(t) ⇒
-                    if (!perm && buf.nonEmpty && getIndex(t) <= getIndex(buf.last))
-                        state = -1
-                    else
-                        buf += t
-                case None ⇒ state = -1
-            }
+                    toks.filter(t ⇒ !res.contains(t) && isMatch(t, chunk))
 
-        if (state != -1 &&
-            {
-                val remained = toks.filter(t ⇒ !buf.contains(t))
+            if (seq.nonEmpty) {
+                val head = seq.head
 
-                !this.exists(chunk ⇒ remained.exists(t ⇒ isMatch(t, chunk)))
+                if (!perm && res.nonEmpty && getIndex(head) <= getIndex(res.last))
+                    state = -1
+                else {
+                    res += head
+                    all ++= seq
+                }
             }
-        )
-            Some(buf)
+            else
+                state = -1
+        }
+
+        if (state != -1 && all.size == res.size)
+            Some(res)
         else
             None
     }
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
index fec01e5..46506fd 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
@@ -272,7 +272,7 @@ object NCModelEnricher extends NCProbeEnricher with DecorateAsScala {
 //      * @param toks
 //      * @param elemId
 //      */
-//    private def alreadyMarked(toks: Seq[NlpToken], elemId: String): Boolean = toks.forall(_.isTypeOf(elemId))
+    private def alreadyMarked(toks: Seq[NlpToken], elemId: String): Boolean = toks.forall(_.isTypeOf(elemId))
 
     /**
       *
@@ -435,6 +435,7 @@ object NCModelEnricher extends NCProbeEnricher with DecorateAsScala {
                             foundNotSparse = true
                         }
 
+
                         // 1. Simple, sparse.
                         if (firstPhase && sparseEnabled)
                             for (syn ← mdl.sparseSynonyms.getOrElse(elemId, Seq.empty) if !foundSparse)
@@ -479,16 +480,19 @@ object NCModelEnricher extends NCProbeEnricher with DecorateAsScala {
 
                         // 3. DSL, sparse.
                         if (sparseEnabled)
-                            for (syn ← mdl.sparseSynonymsDsl.getOrElse(elemId, Seq.empty); (_, seq) ← dslCombs; comb ← seq if !foundSparse) {
+                            for (
+                                (_, seq) ← dslCombs;
+                                syn ← mdl.sparseSynonymsDsl.getOrElse(elemId, Seq.empty);
+                                comb ← seq if !foundSparse
+                            ) {
                                 syn.trySparseMatch(comb.map(_.data), req) match {
-                                    case Some(towsRes) ⇒
-                                        addSparse(convert(towsRes, ns), syn, getPartsContent(towsRes, syn))
+                                    case Some(towsRes) ⇒ addSparse(convert(towsRes, ns), syn, getPartsContent(towsRes, syn))
                                     case None ⇒ // No-op.
                                 }
                             }
 
+                        // 4. DSL, non sparse.
                         if (notSparseEnabled) {
-                            // 4. DSL, non sparse.
                             for (
                                 (len, seq) ← dslCombs;
                                 syn ← fastAccess(mdl.nonSparseSynonymsDsl, elemId, len).getOrElse(Seq.empty);
@@ -608,7 +612,7 @@ object NCModelEnricher extends NCProbeEnricher with DecorateAsScala {
                             )
 
                             // TODO:
-                            //if (!alreadyMarked(matchedToks, elemId))
+                            if (!alreadyMarked(matchedToks, elemId))
                                 mark(
                                     ns,
                                     elem = mdl.elements.getOrElse(elemId, throw new NCE(s"Custom model parser returned unknown element ID: $elemId")),
diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCEnricherNestedModelSpec4.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCEnricherNestedModelSpec4.scala
index 43320e7..680086d 100644
--- a/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCEnricherNestedModelSpec4.scala
+++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCEnricherNestedModelSpec4.scala
@@ -41,6 +41,9 @@ class NCNestedTestModel4 extends NCModelAdapter(
 
     @NCIntent("intent=onE2 term(t1)={tok_id() == 'e2'}[8, 100]")
     def onAB(ctx: NCIntentMatch): NCResult = NCResult.text("OK")
+
+    override def isPermutateSynonyms: Boolean = false
+    override def isSparse: Boolean = false
 }
 
 /**

[incubator-nlpcraft] 05/05: Merge branch 'master' into NLPCRAFT-287

Posted by se...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

sergeykamov pushed a commit to branch NLPCRAFT-287
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git

commit 0b6ffaf5a984fbf28d062eccc1e512930a9a7d80
Merge: 6659315 fc814d9
Author: Sergey Kamov <sk...@gmail.com>
AuthorDate: Tue Apr 6 12:14:50 2021 +0300

    Merge branch 'master' into NLPCRAFT-287

 nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCDialogFlowItem.java | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

[incubator-nlpcraft] 03/05: WIP.

Posted by se...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

sergeykamov pushed a commit to branch NLPCRAFT-287
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git

commit c4d2d15cb4ff94c96105cedfb12a70e4845dd113
Author: Sergey Kamov <sk...@gmail.com>
AuthorDate: Tue Apr 6 11:25:59 2021 +0300

    WIP.
---
 .../nlpcraft/probe/mgrs/NCProbeSynonym.scala       |  51 ++++++-----
 .../nlpcraft/probe/mgrs/model/NCModelManager.scala |  22 ++---
 .../mgrs/nlp/enrichers/model/NCModelEnricher.scala | 100 ++++++++++-----------
 .../probe/mgrs/sentence/NCSentenceManager.scala    |  22 +++--
 4 files changed, 106 insertions(+), 89 deletions(-)

diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeSynonym.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeSynonym.scala
index 5324304..95c526f 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeSynonym.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeSynonym.scala
@@ -92,37 +92,44 @@ class NCProbeSynonym(
         require(toks != null)
         require(toks.nonEmpty)
 
-        lazy val res = mutable.ArrayBuffer.empty[T]
-        lazy val all = mutable.HashSet.empty[T]
+        if (toks.size >= this.size) {
+            lazy val res = mutable.ArrayBuffer.empty[T]
+            lazy val all = mutable.HashSet.empty[T]
 
-        var state = 0
+            var state = 0
 
-        for (chunk ← this if state != -1) {
-            val seq =
-                if (state == 0) {
-                    state = 1
+            for (chunk ← this if state != -1) {
+                val seq =
+                    if (state == 0) {
+                        state = 1
 
-                    toks.filter(t ⇒ isMatch(t, chunk))
-                }
-                else
-                    toks.filter(t ⇒ !res.contains(t) && isMatch(t, chunk))
+                        toks.filter(t ⇒ isMatch(t, chunk))
+                    }
+                    else
+                        toks.filter(t ⇒ !res.contains(t) && isMatch(t, chunk))
 
-            if (seq.nonEmpty) {
-                val head = seq.head
+                if (seq.nonEmpty) {
+                    val head = seq.head
 
-                if (!perm && res.nonEmpty && getIndex(head) <= getIndex(res.last))
-                    state = -1
-                else {
-                    res += head
-                    all ++= seq
+                    if (!perm && res.nonEmpty && getIndex(head) <= getIndex(res.last))
+                        state = -1
+                    else {
+                        res += head
+                        all ++= seq
+
+                        if (all.size > res.size)
+                            state = -1
+                    }
                 }
+                else
+                    state = -1
             }
+
+            if (state != -1 && all.size == res.size)
+                Some(res)
             else
-                state = -1
+                None
         }
-
-        if (state != -1 && all.size == res.size)
-            Some(res)
         else
             None
     }
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/model/NCModelManager.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/model/NCModelManager.scala
index cdfdf89..03c59ff 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/model/NCModelManager.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/model/NCModelManager.scala
@@ -65,17 +65,19 @@ object NCModelManager extends NCService with DecorateAsScala {
                 val elmCnt = w.elements.keySet.size
                 val intentCnt = w.intents.size
 
+                def getWithWarning(i: Int): String = if (i == 0) s"0 ${r("(!)")}" else i.toString
+
                 tbl += Seq(
-                    s"Name:                  ${bo(c(mdl.getName))}",
-                    s"ID:                    ${bo(mdl.getId)}",
-                    s"Version:               ${mdl.getVersion}",
-                    s"Origin:                ${mdl.getOrigin}",
-                    s"Elements:              $elmCnt" + (if (elmCnt == 0) s" ${r("(!)")}" else ""),
-                    s"Synonyms:              $synCnt" + (if (synCnt == 0) s" ${r("(!)")}" else ""),
-                    s"Synonyms(DSL):         $synDslCnt" + (if (synDslCnt == 0) s" ${r("(!)")}" else ""),
-                    s"Synonyms(Sparse):      $synSparseCnt" + (if (synSparseCnt == 0) s" ${r("(!)")}" else ""),
-                    s"Synonyms(Sparse, DSL): $synSparseDslCnt" + (if (synSparseDslCnt == 0) s" ${r("(!)")}" else ""),
-                    s"Intents:               $intentCnt" + (if (intentCnt == 0) s" ${r("(!)")}" else "")
+                    s"Name:                      ${bo(c(mdl.getName))}",
+                    s"ID:                        ${bo(mdl.getId)}",
+                    s"Version:                   ${mdl.getVersion}",
+                    s"Origin:                    ${mdl.getOrigin}",
+                    s"Elements:                  ${getWithWarning(elmCnt)}",
+                    s"Synonyms(Continuous)       $synCnt",
+                    s"Synonyms(Continuous, DSL): $synDslCnt",
+                    s"Synonyms(Sparse):          $synSparseCnt",
+                    s"Synonyms(Sparse, DSL):     $synSparseDslCnt",
+                    s"Intents:                   ${getWithWarning(intentCnt)}"
                 )
             })
         }
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
index 46506fd..f9acd95 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
@@ -296,11 +296,11 @@ object NCModelEnricher extends NCProbeEnricher with DecorateAsScala {
             else None
         }
 
-    private def mkCache(): mutable.Map[String, ArrayBuffer[Seq[NlpToken]]] =
+    private def mkCache(): mutable.Map[String, ArrayBuffer[Seq[Int]]] =
         mutable.HashMap.empty[
             String,
-            mutable.ArrayBuffer[Seq[NlpToken]]
-        ].withDefault(_ ⇒ mutable.ArrayBuffer.empty[Seq[NlpToken]])
+            mutable.ArrayBuffer[Seq[Int]]
+        ].withDefault(_ ⇒ mutable.ArrayBuffer.empty[Seq[Int]])
 
     private def convert(tows: Seq[NCDslContent], ns: NCNlpSentence): Seq[NlpToken] =
         (
@@ -388,15 +388,17 @@ object NCModelEnricher extends NCProbeEnricher with DecorateAsScala {
             ) {
                 _ ⇒
                 for (toks ← combos(ns)) {
-                    val idxsSeq = toks.flatMap(tokIdxs)
-                    val idxsSorted = idxsSeq.sorted
-                    val idxs = idxsSeq.toSet
-                    val idxMin = idxsSorted.head
-                    val idxMax = idxsSorted.last
+                    val indexes = toks.map(_.index)
 
-                    lazy val sorted = idxsSorted.zipWithIndex.toMap
+                    lazy val dslCombs: Map[Int, Seq[Seq[Complex]]] = {
+                        val idxsSeq = toks.flatMap(tokIdxs)
+                        val idxsSorted = idxsSeq.sorted
+                        val idxs = idxsSeq.toSet
+                        val idxMin = idxsSorted.head
+                        val idxMax = idxsSorted.last
+
+                        lazy val sorted = idxsSorted.zipWithIndex.toMap
 
-                    lazy val dslCombs: Map[Int, Seq[Seq[Complex]]] =
                         complexes.par.
                             flatMap(complexSeq ⇒ {
                                 val rec = complexSeq.tokensComplexes.filter(_.isSubsetOf(idxMin, idxMax, idxs))
@@ -412,54 +414,41 @@ object NCModelEnricher extends NCProbeEnricher with DecorateAsScala {
                                     None
                             }).
                             map(_.sortBy(p ⇒ sorted(p.wordIndexes.head))).seq.groupBy(_.length)
+                    }
 
                     lazy val tokStems = toks.map(_.stem).mkString(" ")
 
                     // Attempt to match each element.
                     for (elm ← mdl.elements.values) {
                         val elemId = elm.getId
-                        val sparseEnabled = !cacheSparse(elemId).exists(_.contains(toks))
-                        val notSparseEnabled = !cacheNotSparse(elemId).exists(_.contains(toks))
-                        var foundSparse = false
-                        var foundNotSparse = false
+                        val sparseEnabled = !cacheSparse(elemId).exists(_.containsSlice(indexes))
+                        val notSparseEnabled = !cacheNotSparse(elemId).exists(_.containsSlice(indexes))
+                        var found = false
 
                         def addSparse(res: Seq[NlpToken], syn: NCProbeSynonym, parts: Seq[TokenData]): Unit = {
                             addMatch(elm, res, syn, parts)
-                            cacheSparse(elemId) += toks
-                            foundSparse = true
+                            cacheSparse(elemId) += indexes
+                            found = true
                         }
 
                         def addNotSparse(syn: NCProbeSynonym, parts: Seq[TokenData]): Unit = {
                             addMatch(elm, toks, syn, parts)
-                            cacheNotSparse(elemId) += toks
-                            foundNotSparse = true
+                            cacheNotSparse(elemId) += indexes
+                            found = true
                         }
 
-
-                        // 1. Simple, sparse.
-                        if (firstPhase && sparseEnabled)
-                            for (syn ← mdl.sparseSynonyms.getOrElse(elemId, Seq.empty) if !foundSparse)
-                                syn.trySparseMatch(toks) match {
-                                    case Some(res) ⇒ addSparse(res, syn, Seq.empty)
-                                    case None ⇒ // No-op.
-                                }
-
-                        // 2. Simple, not sparse.
-                        // Optimization - plain synonyms can be used only on first iteration
-                        if (firstPhase && notSparseEnabled)
+                        // 1. Simple, not sparse.
+                        if (firstPhase && notSparseEnabled && !found)
                             fastAccess(mdl.nonSparseSynonyms, elemId, toks.length) match {
                                 case Some(h) ⇒
                                     def tryMap(synsMap: Map[String, NCProbeSynonym], notFound: () ⇒ Unit): Unit =
                                         synsMap.get(tokStems) match {
                                             case Some(syn) ⇒ addNotSparse(syn, Seq.empty)
-                                                // TODO:
-                                                //if (!found)
-                                                //   notFound()
                                             case None ⇒ notFound()
                                         }
 
                                     def tryScan(synsSeq: Seq[NCProbeSynonym]): Unit =
-                                        for (syn ← synsSeq if !foundNotSparse)
+                                        for (syn ← synsSeq if !found)
                                             if (syn.isMatch(toks))
                                                 addNotSparse(syn, Seq.empty)
 
@@ -468,7 +457,7 @@ object NCModelEnricher extends NCProbeEnricher with DecorateAsScala {
                                         () ⇒ {
                                             tryScan(h.notTxtDirectSynonyms)
 
-                                            if (!foundNotSparse)
+                                            if (!found)
                                                 tryMap(
                                                     h.txtNotDirectSynonyms,
                                                     () ⇒ tryScan(h.notTxtNotDirectSynonyms)
@@ -478,30 +467,38 @@ object NCModelEnricher extends NCProbeEnricher with DecorateAsScala {
                                 case None ⇒ // No-op.
                             }
 
-                        // 3. DSL, sparse.
-                        if (sparseEnabled)
-                            for (
-                                (_, seq) ← dslCombs;
-                                syn ← mdl.sparseSynonymsDsl.getOrElse(elemId, Seq.empty);
-                                comb ← seq if !foundSparse
-                            ) {
-                                syn.trySparseMatch(comb.map(_.data), req) match {
-                                    case Some(towsRes) ⇒ addSparse(convert(towsRes, ns), syn, getPartsContent(towsRes, syn))
-                                    case None ⇒ // No-op.
-                                }
-                            }
-
-                        // 4. DSL, non sparse.
-                        if (notSparseEnabled) {
+                        // 2. DSL, non sparse.
+                        if (notSparseEnabled && mdl.nonSparseSynonymsDsl.nonEmpty && !found) {
                             for (
                                 (len, seq) ← dslCombs;
                                 syn ← fastAccess(mdl.nonSparseSynonymsDsl, elemId, len).getOrElse(Seq.empty);
-                                comb ← seq if !foundNotSparse
+                                comb ← seq if !found
                             ) {
                                 if (syn.isMatch(comb.map(_.data), req))
                                     addNotSparse(syn, getPartsComplex(comb, syn))
                             }
                         }
+
+                        // 3. Simple, sparse.
+                        if (firstPhase && sparseEnabled && !found)
+                            for (syn ← mdl.sparseSynonyms.getOrElse(elemId, Seq.empty) if !found)
+                                syn.trySparseMatch(toks) match {
+                                    case Some(res) ⇒ addSparse(res, syn, Seq.empty)
+                                    case None ⇒ // No-op.
+                                }
+
+                        // 4. DSL, sparse.
+                        if (sparseEnabled && mdl.sparseSynonymsDsl.nonEmpty && !found)
+                            for (
+                                syn ← mdl.sparseSynonymsDsl.getOrElse(elemId, Seq.empty);
+                                (_, seq) ← dslCombs;
+                                comb ← seq if !found
+                            ) {
+                                syn.trySparseMatch(comb.map(_.data), req) match {
+                                    case Some(towsRes) ⇒ addSparse(convert(towsRes, ns), syn, getPartsContent(towsRes, syn))
+                                    case None ⇒ // No-op.
+                                }
+                            }
                     }
                 }
             }
@@ -529,6 +526,7 @@ object NCModelEnricher extends NCProbeEnricher with DecorateAsScala {
 
             val matchCnt = matchesNorm.size
 
+
             // TODO:matchesNorm
             // Add notes for all remaining (non-intersecting) matches.
             for ((m, idx) ← matches.zipWithIndex) {
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/sentence/NCSentenceManager.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/sentence/NCSentenceManager.scala
index ad66b8f..a938f59 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/sentence/NCSentenceManager.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/sentence/NCSentenceManager.scala
@@ -24,9 +24,9 @@ import org.apache.nlpcraft.common.nlp.{NCNlpSentence, NCNlpSentenceNote, NCNlpSe
 import org.apache.nlpcraft.common.{NCE, NCService, U}
 import org.apache.nlpcraft.model.NCModel
 
-import java.io.{Serializable ⇒ JSerializable}
+import java.io.{Serializable => JSerializable}
 import java.util
-import java.util.{List ⇒ JList}
+import java.util.{List => JList}
 import scala.collection.JavaConverters.{asScalaBufferConverter, _}
 import scala.collection.{Map, Seq, mutable}
 import scala.language.implicitConversions
@@ -37,6 +37,8 @@ import scala.language.implicitConversions
 object NCSentenceManager extends NCService {
     @volatile private var pool: java.util.concurrent.ForkJoinPool = _
 
+    private val cache = U.mkLRUMap[Seq[Set[NCNlpSentenceNote]], util.List[util.List[NCNlpSentenceNote]]]("sentence-combinations-cache", 500)
+
     case class PartKey(id: String, start: Int, end: Int) {
         require(start <= end)
 
@@ -197,7 +199,7 @@ object NCSentenceManager extends NCService {
       * @param noteField
       * @param ns
       */
-    private def fixNoteIndexesList(note: String, idxsField: String, noteField: String, ns: NCNlpSentence): Unit = {
+    private def fixNoteIndexesList(note: String, idxsField: String, noteField: String, ns: NCNlpSentence): Unit =
         ns.flatMap(_.getNotes(note)).foreach(rel ⇒
             rel.dataOpt[JList[JList[Int]]](idxsField) match {
                 case Some(idxsList) ⇒
@@ -211,7 +213,6 @@ object NCSentenceManager extends NCService {
                 case None ⇒ // No-op.
             }
         )
-    }
 
     /**
       * Copies token.
@@ -679,14 +680,23 @@ object NCSentenceManager extends NCService {
 
         var sens =
             if (delCombs.nonEmpty) {
-                val toksByIdx =
+                val toksByIdx: Seq[Set[NCNlpSentenceNote]] =
                     delCombs.flatMap(note ⇒ note.wordIndexes.map(_ → note)).
                         groupBy { case (idx, _) ⇒ idx }.
                         map { case (_, seq) ⇒ seq.map { case (_, note) ⇒ note }.toSet }.
                         toSeq.sortBy(-_.size)
 
+
+                var combs: JList[JList[NCNlpSentenceNote]] = cache.get(toksByIdx)
+
+                if (combs == null) {
+                    combs = NCSentenceHelper.findCombinations(toksByIdx.map(_.asJava).asJava, pool)
+
+                    cache.put(toksByIdx, combs)
+                }
+
                 val seqSens =
-                    NCSentenceHelper.findCombinations(toksByIdx.map(_.asJava).asJava, pool).asScala.map(_.asScala).
+                    combs.asScala.map(_.asScala).
                         par.
                         flatMap(delComb ⇒ {
                             val nsClone = sen.clone()

[incubator-nlpcraft] 01/05: WIP.

Posted by se...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

sergeykamov pushed a commit to branch NLPCRAFT-287
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git

commit ac555ef138c430e897d4fda09a03e31fcf2f70df
Author: Sergey Kamov <sk...@gmail.com>
AuthorDate: Mon Apr 5 21:24:05 2021 +0300

    WIP.
---
 .../scala/org/apache/nlpcraft/common/util/NCUtils.scala     | 13 ++++++-------
 .../org/apache/nlpcraft/probe/mgrs/NCProbeSynonym.scala     |  7 +------
 .../probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala    |  3 +--
 .../nlp/enrichers/model/NCEnricherNestedModelSpec2.scala    |  4 ++--
 4 files changed, 10 insertions(+), 17 deletions(-)

diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/util/NCUtils.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/util/NCUtils.scala
index 23ca22b..141e813 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/util/NCUtils.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/util/NCUtils.scala
@@ -1424,13 +1424,12 @@ object NCUtils extends LazyLogging {
      * @param e
      */
     def prettyError(logger: Logger, title: String, e: Throwable): Unit = {
-        e.printStackTrace()
-//        // Keep the full trace in the 'trace' log level.
-//        logger.trace(title, e)
-//
-//        prettyErrorImpl(new PrettyErrorLogger {
-//            override def log(s: String): Unit = logger.error(s)
-//        }, title, e)
+        // Keep the full trace in the 'trace' log level.
+        logger.trace(title, e)
+
+        prettyErrorImpl(new PrettyErrorLogger {
+            override def log(s: String): Unit = logger.error(s)
+        }, title, e)
     }
 
     /**
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeSynonym.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeSynonym.scala
index 6960fec..fffd476 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeSynonym.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeSynonym.scala
@@ -113,11 +113,7 @@ class NCProbeSynonym(
                 case None ⇒ state = -1
             }
 
-
-        if (state != -1
-            &&
-//            buf.contains(toks.head) &&
-//            buf.contains(toks.last) &&
+        if (state != -1 &&
             {
                 val remained = toks.filter(t ⇒ !buf.contains(t))
 
@@ -127,7 +123,6 @@ class NCProbeSynonym(
             Some(buf)
         else
             None
-
     }
 
     /**
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
index 102d48c..fec01e5 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
@@ -19,9 +19,8 @@ package org.apache.nlpcraft.probe.mgrs.nlp.enrichers.model
 
 import io.opencensus.trace.Span
 import org.apache.nlpcraft.common._
-import org.apache.nlpcraft.common.nlp.{NCNlpSentenceToken => NlpToken, _}
+import org.apache.nlpcraft.common.nlp.{NCNlpSentenceToken ⇒ NlpToken, _}
 import org.apache.nlpcraft.model._
-import org.apache.nlpcraft.model.impl.NCTokenLogger
 import org.apache.nlpcraft.probe.mgrs.NCProbeSynonym.NCDslContent
 import org.apache.nlpcraft.probe.mgrs.NCProbeSynonymChunkKind.{NCSynonymChunkKind, TEXT}
 import org.apache.nlpcraft.probe.mgrs.nlp.NCProbeEnricher
diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCEnricherNestedModelSpec2.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCEnricherNestedModelSpec2.scala
index 82b6686..ede9153 100644
--- a/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCEnricherNestedModelSpec2.scala
+++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCEnricherNestedModelSpec2.scala
@@ -47,9 +47,9 @@ class NCNestedTestModel21 extends NCModelAdapter("nlpcraft.nested2.test.mdl", "N
 class NCEnricherNestedModelSpec21 extends NCTestContext {
     @Test
     def test(): Unit = {
-        //checkIntent("word", "onE1")
+        checkIntent("word", "onE1")
         checkIntent("10 word", "onE1")
-        //checkIntent("11 12 word", "onNumAndE1")
+        checkIntent("11 12 word", "onNumAndE1")
     }
 }
 

[incubator-nlpcraft] 04/05: WIP.

Posted by se...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

sergeykamov pushed a commit to branch NLPCRAFT-287
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git

commit 66593155e7f4fbf96aa694afa6a0675096ce3166
Author: Sergey Kamov <sk...@gmail.com>
AuthorDate: Tue Apr 6 12:14:45 2021 +0300

    WIP.
---
 .../nlpcraft/probe/mgrs/NCProbeSynonym.scala       |  3 +-
 .../mgrs/nlp/enrichers/model/NCModelEnricher.scala | 61 +++++++++++-----------
 .../probe/mgrs/sentence/NCSentenceManager.scala    | 13 +----
 3 files changed, 34 insertions(+), 43 deletions(-)

diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeSynonym.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeSynonym.scala
index 95c526f..c54b347 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeSynonym.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeSynonym.scala
@@ -114,11 +114,12 @@ class NCProbeSynonym(
                     if (!perm && res.nonEmpty && getIndex(head) <= getIndex(res.last))
                         state = -1
                     else {
-                        res += head
                         all ++= seq
 
                         if (all.size > res.size)
                             state = -1
+                        else
+                            res += head
                     }
                 }
                 else
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
index f9acd95..30f5084 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
@@ -19,14 +19,14 @@ package org.apache.nlpcraft.probe.mgrs.nlp.enrichers.model
 
 import io.opencensus.trace.Span
 import org.apache.nlpcraft.common._
-import org.apache.nlpcraft.common.nlp.{NCNlpSentenceToken ⇒ NlpToken, _}
+import org.apache.nlpcraft.common.nlp.{NCNlpSentenceToken => NlpToken, _}
 import org.apache.nlpcraft.model._
 import org.apache.nlpcraft.probe.mgrs.NCProbeSynonym.NCDslContent
 import org.apache.nlpcraft.probe.mgrs.NCProbeSynonymChunkKind.{NCSynonymChunkKind, TEXT}
 import org.apache.nlpcraft.probe.mgrs.nlp.NCProbeEnricher
 import org.apache.nlpcraft.probe.mgrs.nlp.impl.NCRequestImpl
 import org.apache.nlpcraft.probe.mgrs.sentence.NCSentenceManager
-import org.apache.nlpcraft.probe.mgrs.{NCProbeModel, NCProbeSynonym, NCProbeVariants}
+import org.apache.nlpcraft.probe.mgrs.{NCProbeModel, NCProbeSynonym ⇒ Synonym, NCProbeVariants}
 
 import java.io.Serializable
 import java.util
@@ -39,8 +39,9 @@ import scala.collection.{Map, Seq, mutable}
   * Model elements enricher.
   */
 object NCModelEnricher extends NCProbeEnricher with DecorateAsScala {
-    type TokenData = (NCToken, NCSynonymChunkKind)
-    
+    type TokType = (NCToken, NCSynonymChunkKind)
+    type Cache = mutable.Map[String, ArrayBuffer[Seq[Int]]]
+
     object Complex {
         def apply(t: NCToken): Complex =
             Complex(
@@ -128,8 +129,8 @@ object NCModelEnricher extends NCProbeEnricher with DecorateAsScala {
     case class ElementMatch(
         element: NCElement,
         tokens: Seq[NlpToken],
-        synonym: NCProbeSynonym,
-        parts: Seq[TokenData]
+        synonym: Synonym,
+        parts: Seq[TokType]
     ) extends Ordered[ElementMatch] {
         // Tokens sparsity.
         lazy val sparsity = U.calcSparsity(tokens.map(_.index))
@@ -197,9 +198,9 @@ object NCModelEnricher extends NCProbeEnricher with DecorateAsScala {
         elem: NCElement,
         toks: Seq[NlpToken],
         direct: Boolean,
-        syn: Option[NCProbeSynonym],
+        syn: Option[Synonym],
         metaOpt: Option[Map[String, Object]],
-        parts: Seq[TokenData]
+        parts: Seq[TokType]
     ): Unit = {
         val params = mutable.ArrayBuffer.empty[(String, AnyRef)]
 
@@ -279,7 +280,7 @@ object NCModelEnricher extends NCProbeEnricher with DecorateAsScala {
       * @param comb
       * @param syn
       */
-    private def getPartsComplex(comb: Seq[Complex], syn: NCProbeSynonym): Seq[TokenData] =
+    private def getPartsComplex(comb: Seq[Complex], syn: Synonym): Seq[TokType] =
         comb.zip(syn.map(_.kind)).flatMap {
             case (complex, kind) ⇒ if (complex.isToken) Some(complex.token → kind)
             else None
@@ -290,19 +291,18 @@ object NCModelEnricher extends NCProbeEnricher with DecorateAsScala {
       * @param comb
       * @param syn
       */
-    private def getPartsContent(comb: Seq[NCDslContent], syn: NCProbeSynonym): Seq[TokenData] =
+    private def toParts(comb: Seq[NCDslContent], syn: Synonym): Seq[TokType] =
         comb.zip(syn.map(_.kind)).flatMap {
-            case (complex, kind) ⇒ if (complex.isLeft) Some(complex.left.get → kind)
-            else None
+            case (complex, kind) ⇒ if (complex.isLeft) Some(complex.left.get → kind) else None
         }
 
-    private def mkCache(): mutable.Map[String, ArrayBuffer[Seq[Int]]] =
+    private def mkCache(): Cache =
         mutable.HashMap.empty[
             String,
             mutable.ArrayBuffer[Seq[Int]]
         ].withDefault(_ ⇒ mutable.ArrayBuffer.empty[Seq[Int]])
 
-    private def convert(tows: Seq[NCDslContent], ns: NCNlpSentence): Seq[NlpToken] =
+    private def toNlpTokens(tows: Seq[NCDslContent], ns: NCNlpSentence): Seq[NlpToken] =
         (
             tows.filter(_.isRight).map(_.right.get) ++
                 tows.filter(_.isLeft).map(_.left.get).
@@ -325,7 +325,7 @@ object NCModelEnricher extends NCProbeEnricher with DecorateAsScala {
             val cacheSparse = mkCache()
             val cacheNotSparse = mkCache()
 
-            def addMatch(elm: NCElement, toks: Seq[NlpToken], syn: NCProbeSynonym, parts: Seq[TokenData]): Unit = {
+            def addMatch(elm: NCElement, toks: Seq[NlpToken], syn: Synonym, parts: Seq[TokType]): Unit = {
                 val toksSet = toks.toSet
 
                 // TODO:
@@ -419,35 +419,36 @@ object NCModelEnricher extends NCProbeEnricher with DecorateAsScala {
                     lazy val tokStems = toks.map(_.stem).mkString(" ")
 
                     // Attempt to match each element.
-                    for (elm ← mdl.elements.values) {
-                        val elemId = elm.getId
-                        val sparseEnabled = !cacheSparse(elemId).exists(_.containsSlice(indexes))
-                        val notSparseEnabled = !cacheNotSparse(elemId).exists(_.containsSlice(indexes))
+                    for (
+                        elm ← mdl.elements.values;
+                        elemId = elm.getId;
+                        sparseEnabled = !cacheSparse(elemId).exists(_.containsSlice(indexes));
+                        notSparseEnabled = !cacheNotSparse(elemId).exists(_.containsSlice(indexes))
+
+                        if !alreadyMarked(toks, elm.getId) && (sparseEnabled || notSparseEnabled)
+                    ) {
                         var found = false
 
-                        def addSparse(res: Seq[NlpToken], syn: NCProbeSynonym, parts: Seq[TokenData]): Unit = {
-                            addMatch(elm, res, syn, parts)
-                            cacheSparse(elemId) += indexes
+                        def add(cache: Cache, res: Seq[NlpToken], s: Synonym, parts: Seq[TokType]): Unit = {
+                            addMatch(elm, res, s, parts)
+                            cache(elemId) += indexes
                             found = true
                         }
 
-                        def addNotSparse(syn: NCProbeSynonym, parts: Seq[TokenData]): Unit = {
-                            addMatch(elm, toks, syn, parts)
-                            cacheNotSparse(elemId) += indexes
-                            found = true
-                        }
+                        def addSparse(res: Seq[NlpToken], s: Synonym, parts: Seq[TokType]): Unit = add(cacheSparse, res, s, parts)
+                        def addNotSparse(s: Synonym, parts: Seq[TokType]): Unit = add(cacheNotSparse, toks,  s, parts)
 
                         // 1. Simple, not sparse.
                         if (firstPhase && notSparseEnabled && !found)
                             fastAccess(mdl.nonSparseSynonyms, elemId, toks.length) match {
                                 case Some(h) ⇒
-                                    def tryMap(synsMap: Map[String, NCProbeSynonym], notFound: () ⇒ Unit): Unit =
+                                    def tryMap(synsMap: Map[String, Synonym], notFound: () ⇒ Unit): Unit =
                                         synsMap.get(tokStems) match {
                                             case Some(syn) ⇒ addNotSparse(syn, Seq.empty)
                                             case None ⇒ notFound()
                                         }
 
-                                    def tryScan(synsSeq: Seq[NCProbeSynonym]): Unit =
+                                    def tryScan(synsSeq: Seq[Synonym]): Unit =
                                         for (syn ← synsSeq if !found)
                                             if (syn.isMatch(toks))
                                                 addNotSparse(syn, Seq.empty)
@@ -495,7 +496,7 @@ object NCModelEnricher extends NCProbeEnricher with DecorateAsScala {
                                 comb ← seq if !found
                             ) {
                                 syn.trySparseMatch(comb.map(_.data), req) match {
-                                    case Some(towsRes) ⇒ addSparse(convert(towsRes, ns), syn, getPartsContent(towsRes, syn))
+                                    case Some(towsRes) ⇒ addSparse(toNlpTokens(towsRes, ns), syn, toParts(towsRes, syn))
                                     case None ⇒ // No-op.
                                 }
                             }
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/sentence/NCSentenceManager.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/sentence/NCSentenceManager.scala
index a938f59..fb676d0 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/sentence/NCSentenceManager.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/sentence/NCSentenceManager.scala
@@ -37,8 +37,6 @@ import scala.language.implicitConversions
 object NCSentenceManager extends NCService {
     @volatile private var pool: java.util.concurrent.ForkJoinPool = _
 
-    private val cache = U.mkLRUMap[Seq[Set[NCNlpSentenceNote]], util.List[util.List[NCNlpSentenceNote]]]("sentence-combinations-cache", 500)
-
     case class PartKey(id: String, start: Int, end: Int) {
         require(start <= end)
 
@@ -686,17 +684,8 @@ object NCSentenceManager extends NCService {
                         map { case (_, seq) ⇒ seq.map { case (_, note) ⇒ note }.toSet }.
                         toSeq.sortBy(-_.size)
 
-
-                var combs: JList[JList[NCNlpSentenceNote]] = cache.get(toksByIdx)
-
-                if (combs == null) {
-                    combs = NCSentenceHelper.findCombinations(toksByIdx.map(_.asJava).asJava, pool)
-
-                    cache.put(toksByIdx, combs)
-                }
-
                 val seqSens =
-                    combs.asScala.map(_.asScala).
+                    NCSentenceHelper.findCombinations(toksByIdx.map(_.asJava).asJava, pool).asScala.map(_.asScala).
                         par.
                         flatMap(delComb ⇒ {
                             val nsClone = sen.clone()