You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nlpcraft.apache.org by se...@apache.org on 2021/04/09 11:26:32 UTC

[incubator-nlpcraft] branch NLPCRAFT-287 updated (bbeecf6 -> 8a27348)

This is an automated email from the ASF dual-hosted git repository.

sergeykamov pushed a change to branch NLPCRAFT-287
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git.


    from bbeecf6  WIP.
     new a349b6f  WIP.
     add 05c65e9  Update NCModelView.java
     add 98d26e2  Update NCIdlCompilerBase.scala
     new f00be1e  Merge branch 'master' into NLPCRAFT-287
     new 8a27348  WIP.

The 3 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 .../nlpcraft/common/nlp/NCNlpSentenceNote.scala    |  25 ++--
 .../org/apache/nlpcraft/model/NCModelView.java     |   3 +-
 .../apache/nlpcraft/model/impl/NCTokenImpl.scala   |   8 +-
 .../model/intent/compiler/NCIdlCompilerBase.scala  |   4 +
 .../probe/mgrs/deploy/NCDeployManager.scala        |   4 +-
 .../probe/mgrs/nlp/NCProbeEnrichmentManager.scala  |   2 -
 .../mgrs/nlp/enrichers/model/NCModelEnricher.scala | 151 +++++++++++----------
 .../probe/mgrs/sentence/NCSentenceManager.scala    |  15 +-
 .../nlpcraft/model/sparse/NCSparseSpec.scala       |  15 +-
 9 files changed, 127 insertions(+), 100 deletions(-)

[incubator-nlpcraft] 02/03: Merge branch 'master' into NLPCRAFT-287

Posted by se...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

sergeykamov pushed a commit to branch NLPCRAFT-287
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git

commit f00be1eee670e858bfc7cb1fd4a29c714c27433a
Merge: a349b6f 98d26e2
Author: Sergey Kamov <sk...@gmail.com>
AuthorDate: Fri Apr 9 14:10:15 2021 +0300

    Merge branch 'master' into NLPCRAFT-287

 nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModelView.java    | 3 +--
 .../org/apache/nlpcraft/model/intent/compiler/NCIdlCompilerBase.scala | 4 ++++
 2 files changed, 5 insertions(+), 2 deletions(-)


[incubator-nlpcraft] 01/03: WIP.

Posted by se...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

sergeykamov pushed a commit to branch NLPCRAFT-287
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git

commit a349b6fe1baa5d6069f725f232b32146efb21873
Author: Sergey Kamov <sk...@gmail.com>
AuthorDate: Fri Apr 9 14:09:52 2021 +0300

    WIP.
---
 .../nlpcraft/common/nlp/NCNlpSentenceNote.scala    |  25 ++--
 .../apache/nlpcraft/model/impl/NCTokenImpl.scala   |   8 +-
 .../mgrs/nlp/enrichers/model/NCModelEnricher.scala | 144 +++++++++++----------
 .../probe/mgrs/sentence/NCSentenceManager.scala    |  15 ++-
 .../nlpcraft/model/sparse/NCSparseSpec.scala       |  15 ++-
 5 files changed, 118 insertions(+), 89 deletions(-)

diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentenceNote.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentenceNote.scala
index c0923ae..9adbe01 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentenceNote.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentenceNote.scala
@@ -23,12 +23,13 @@ import org.apache.nlpcraft.common.ascii._
 import scala.collection.JavaConverters._
 import scala.collection.{Seq, Set, mutable}
 import scala.language.implicitConversions
+import java.io.{Serializable ⇒ JSerializable}
 
 /**
   * Sentence token note is a typed map of KV pairs.
   *
   */
-class NCNlpSentenceNote(private val values: Map[String, java.io.Serializable]) extends java.io.Serializable with NCAsciiLike {
+class NCNlpSentenceNote(private val values: Map[String, JSerializable]) extends JSerializable with NCAsciiLike {
     import NCNlpSentenceNote._
 
     @transient
@@ -75,7 +76,7 @@ class NCNlpSentenceNote(private val values: Map[String, java.io.Serializable]) e
         )
 
     override def clone(): NCNlpSentenceNote = {
-        val m = mutable.Map.empty[String, java.io.Serializable] ++ values
+        val m = mutable.Map.empty[String, JSerializable] ++ values
 
         new NCNlpSentenceNote(m.toMap)
     }
@@ -91,20 +92,20 @@ class NCNlpSentenceNote(private val values: Map[String, java.io.Serializable]) e
       *
       * @return
       */
-    def skipNlp(): Map[String, java.io.Serializable] =
+    def skipNlp(): Map[String, JSerializable] =
         values.filter { case (key, _) ⇒ !SKIP_CLONE.contains(key) && key != "noteType" }
 
     /**
       *
       */
-    def asMetadata(): Map[String, java.io.Serializable] =
+    def asMetadata(): Map[String, JSerializable] =
         if (isUser)
             values.get("meta") match {
-                case Some(meta) ⇒ meta.asInstanceOf[Map[String, java.io.Serializable]]
-                case None ⇒ Map.empty[String, java.io.Serializable]
+                case Some(meta) ⇒ meta.asInstanceOf[Map[String, JSerializable]]
+                case None ⇒ Map.empty[String, JSerializable]
             }
         else {
-            val md = mutable.Map.empty[String, java.io.Serializable]
+            val md = mutable.Map.empty[String, JSerializable]
 
             val m = if (noteType != "nlpcraft:nlp") skipNlp() else values
 
@@ -117,8 +118,8 @@ class NCNlpSentenceNote(private val values: Map[String, java.io.Serializable]) e
      *
      * @param kvs
      */
-    def clone(kvs : (String, java.io.Serializable)*): NCNlpSentenceNote = {
-        val m = mutable.HashMap.empty[String, java.io.Serializable] ++ values
+    def clone(kvs : (String, JSerializable)*): NCNlpSentenceNote = {
+        val m = mutable.HashMap.empty[String, JSerializable] ++ values
 
         kvs.foreach(kv ⇒ m += kv._1 → kv._2)
 
@@ -206,7 +207,7 @@ object NCNlpSentenceNote {
     /**
      * To immutable map.
      */
-    implicit def values(note: NCNlpSentenceNote): Map[String, java.io.Serializable] = note.values
+    implicit def values(note: NCNlpSentenceNote): Map[String, JSerializable] = note.values
 
     /**
       * Creates new note with given parameters.
@@ -228,7 +229,7 @@ object NCNlpSentenceNote {
         val (sparsity, tokMinIndex, tokMaxIndex, tokWordIndexes, len) = calc(wordIndexesOpt.getOrElse(indexes))
 
         new NCNlpSentenceNote(
-            mutable.HashMap[String, java.io.Serializable]((
+            mutable.HashMap[String, JSerializable]((
             params.filter(_._2 != null) :+
                ("noteType" → typ) :+
                ("tokMinIndex" → indexes.min) :+
@@ -240,7 +241,7 @@ object NCNlpSentenceNote {
                ("wordLength" → len) :+
                ("sparsity" → sparsity) :+
                ("contiguous" → (sparsity == 0))
-            ).map(p ⇒ p._1 → p._2.asInstanceOf[java.io.Serializable]): _*).toMap
+            ).map(p ⇒ p._1 → p._2.asInstanceOf[JSerializable]): _*).toMap
         )
     }
 
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/NCTokenImpl.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/NCTokenImpl.scala
index 017ead1..8c5005a 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/NCTokenImpl.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/NCTokenImpl.scala
@@ -17,7 +17,7 @@
 
 package org.apache.nlpcraft.model.impl
 
-import java.io.Serializable
+import java.io.{Serializable ⇒ JSerializable}
 import java.util.Collections
 
 import org.apache.nlpcraft.common._
@@ -50,7 +50,7 @@ private[nlpcraft] class NCTokenImpl(
     endCharIndex: Int,
     meta: Map[String, Object],
     isAbstractProp: Boolean
-) extends NCToken with Serializable {
+) extends NCToken with JSerializable {
     require(mdl != null)
     require(srvReqId != null)
     require(id != null)
@@ -106,7 +106,7 @@ private[nlpcraft] object NCTokenImpl {
         // nlpcraft:nlp and some optional (after collapsing).
         require(tok.size <= 2, s"Unexpected token [size=${tok.size}, token=$tok]")
 
-        val md = mutable.HashMap.empty[String, java.io.Serializable]
+        val md = mutable.HashMap.empty[String, JSerializable]
 
         tok.foreach(n ⇒ {
             val id = n.noteType.toLowerCase
@@ -142,7 +142,7 @@ private[nlpcraft] object NCTokenImpl {
                 // Special synthetic meta data element.
                 md.put("nlpcraft:nlp:freeword", false)
 
-                elm.getMetadata.asScala.foreach { case (k, v) ⇒ md.put(k, v.asInstanceOf[java.io.Serializable]) }
+                elm.getMetadata.asScala.foreach { case (k, v) ⇒ md.put(k, v.asInstanceOf[JSerializable]) }
 
                 new NCTokenImpl(
                     mdl.model,
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
index 0ec40cd..d668c02 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
@@ -130,13 +130,21 @@ object NCModelEnricher extends NCProbeEnricher with DecorateAsScala {
 
     case class ComplexHolder(complexesWords: Seq[Complex], complexes: Seq[ComplexSeq])
 
-    // Found-by-synonym model element.
+    /**
+      * Found-by-synonym model element.
+      *
+      * @param element Element.
+      * @param tokens Element tokens.
+      * @param synonym Synonyms.
+      * @param parts Parts for DSL synonyms.
+      * @param allToksIdxs All tokens indexes (whole tokens slice, has sense for sparse tokens)
+      */
     case class ElementMatch(
         element: NCElement,
         tokens: Seq[NlpToken],
         synonym: Synonym,
         parts: Seq[TokType],
-        tokIdxs: Seq[Int]
+        allToksIdxs: Seq[Int]
     ) extends Ordered[ElementMatch] {
         // Tokens sparsity.
         lazy val sparsity: Int = U.calcSparsity(tokens.map(_.index))
@@ -206,7 +214,7 @@ object NCModelEnricher extends NCProbeEnricher with DecorateAsScala {
       * @param syn
       * @param metaOpt
       * @param parts
-      * @param toksIdxs
+      * @param allToksIdxs
       */
     private def mark(
         ns: NCNlpSentence,
@@ -216,16 +224,15 @@ object NCModelEnricher extends NCProbeEnricher with DecorateAsScala {
         syn: Option[Synonym],
         metaOpt: Option[Map[String, Object]],
         parts: Seq[TokType],
-        toksIdxs: Seq[Int]
+        allToksIdxs: Seq[Int]
     ): Unit = {
         val params = mutable.ArrayBuffer.empty[(String, AnyRef)]
 
         // For system elements.
         params += "direct" → direct.asInstanceOf[AnyRef]
 
-        val toksIdxsJava: JList[Int] = toksIdxs.asJava
-
-        params += "allToksIndexes" → toksIdxsJava
+        // Internal usage.
+        params += "allToksIndexes" → allToksIdxs.asJava
 
         syn match {
             case Some(s) ⇒
@@ -334,6 +341,7 @@ object NCModelEnricher extends NCProbeEnricher with DecorateAsScala {
                             ).getOrElse(throw new AssertionError(s"Custom model parser returned an invalid custom token: $w"))
                         )
 
+                        // Checks element's tokens.
                         if (!alreadyMarked(matchedToks, elemId))
                             mark(
                                 ns,
@@ -379,17 +387,15 @@ object NCModelEnricher extends NCProbeEnricher with DecorateAsScala {
       * @param toks
       * @param elemId
       */
-    private def alreadyMarked(toks: Seq[NlpToken], elemId: String): Boolean =
-        toks.forall(_.isTypeOf(elemId)) ||
-        toks.flatten.exists(n ⇒
-            n.noteType == elemId &&
-            (
-                n.dataOpt("allToksIndexes").asInstanceOf[Option[JList[Int]]] match {
-                    case Some(idxs) ⇒ idxs.asScala.containsSlice(toks.map(_.index))
-                    case None ⇒ false
-                }
-            )
-        )
+    private def alreadyMarked(toks: Seq[NlpToken], elemId: String): Boolean = {
+        def hasIndex(n: NCNlpSentenceNote): Boolean =
+            n.dataOpt("allToksIndexes").asInstanceOf[Option[JList[Int]]] match {
+                case Some(idxs) ⇒ idxs.asScala.containsSlice(toks.map(_.index))
+                case None ⇒ false
+            }
+
+         toks.flatten.exists(n ⇒ n.noteType == elemId && hasIndex(n))
+    }
 
     /**
       *
@@ -519,39 +525,6 @@ object NCModelEnricher extends NCProbeEnricher with DecorateAsScala {
             }).seq
     }
 
-    /**
-      *
-      * @param ns
-      * @param mdlId
-      * @param matches
-      */
-    private def processMatches(ns: NCNlpSentence, mdlId: String, matches: Seq[ElementMatch]): Unit = {
-        // TODO:matchesNorm
-        // Add notes for all remaining (non-intersecting) matches.
-        for ((m, idx) ← matches.zipWithIndex) {
-            if (DEEP_DEBUG)
-                logger.trace(
-                    s"Model '$mdlId' element found (${idx + 1} of ${matches.size}) [" +
-                        s"elementId=${m.element.getId}, " +
-                        s"synonym=${m.synonym}, " +
-                        s"tokens=${tokString(m.tokens)}" +
-                        s"]"
-                )
-
-            val tokIdxs = m.tokens.map(_.index)
-            val direct = m.synonym.isDirect && (tokIdxs == tokIdxs.sorted)
-
-            // TODO:
-            if (!alreadyMarked(m.tokens, m.element.getId)) {
-                mark(ns, m.element, m.tokens, direct, syn = Some(m.synonym), metaOpt = None, m.parts, m.tokIdxs)
-
-                println(s"SET: ${m.element.getId}, m.tokens=${m.tokens.map(_.origText).mkString("|")}")
-            }
-            else
-                println(s"NOT SET: ${m.element.getId}, m.tokens=${m.tokens.map(_.origText).mkString("|")}")
-        }
-    }
-
     @throws[NCE]
     override def enrich(mdl: NCProbeModel, ns: NCNlpSentence, senMeta: Map[String, Serializable], parent: Span = null): Unit = {
         require(isStarted)
@@ -561,7 +534,7 @@ object NCModelEnricher extends NCProbeEnricher with DecorateAsScala {
 
         startScopedSpan("enrich", parent, "srvReqId" → srvReqId, "mdlId" → mdlId, "txt" → ns.text) { span ⇒
             val req = NCRequestImpl(senMeta, srvReqId)
-            val h = mkComplexes(mdl, ns)
+            lazy val h = mkComplexes(mdl, ns)
 
             startScopedSpan("synsProc", span, "srvReqId" → srvReqId, "mdlId" → mdlId, "txt" → ns.text) { _ ⇒
                 var state = if (ns.firstProbePhase) SIMPLE else DSL_NEXT
@@ -571,9 +544,6 @@ object NCModelEnricher extends NCProbeEnricher with DecorateAsScala {
                 val combosToks = combos(ns)
 
                 def go(): Unit = {
-                    println
-                    println(s"GO $state")
-
                     val matches = mutable.ArrayBuffer.empty[ElementMatch]
 
                     val cacheSparse = mkCache(mdl)
@@ -582,22 +552,30 @@ object NCModelEnricher extends NCProbeEnricher with DecorateAsScala {
 
                     var found = false
 
-                    def add(typ: String, elm: NCElement, cache: Cache, res: Seq[NlpToken], tokIdxs: Seq[Int], s: Synonym, parts: Seq[TokType] = Seq.empty): Unit = {
+                    def add(typ: String, elm: NCElement, cache: Cache, res: Seq[NlpToken], allToksIdxs: Seq[Int], s: Synonym, parts: Seq[TokType] = Seq.empty): Unit = {
                         var added = false
 
                         if (!matchExist(elm.getId, res)) {
-                            matches += ElementMatch(elm, res, s, parts, tokIdxs)
+                            matches += ElementMatch(elm, res, s, parts, allToksIdxs)
 
                             added = true
                         }
 
-                        cache(elm.getId) += tokIdxs
+                        cache(elm.getId) += allToksIdxs
                         found = true
 
-                        println(s"ADDED: ${elm.getId}, type=$typ, res=${res.map(_.origText).mkString("|")}, toks=${tokIdxs.mkString("|")}, added=$added")
+                        if (DEEP_DEBUG)
+                            logger.trace(
+                                s"Found element [" +
+                                    s"id=${elm.getId}, " +
+                                    s"type=$typ, " +
+                                    s"indexes=${res.map(_.index).mkString("|")}, " +
+                                    s"allTokensIndexes=${allToksIdxs.mkString("|")}, " +
+                                    s"added=$added" +
+                                    s"]"
+                            )
                     }
 
-                    // TODO:
                     def matchExist(elemId: String, toks: Seq[NlpToken]): Boolean =
                         matches.exists(m ⇒ m.element.getId == elemId && toks.toSet.subsetOf(m.tokensSet))
 
@@ -607,15 +585,16 @@ object NCModelEnricher extends NCProbeEnricher with DecorateAsScala {
                         lazy val tokStems = toks.map(_.stem).mkString(" ")
 
                         // Attempt to match each element.
-                        // TODO: alreadyMarked - может быть найдено тоже самое но отмечено меньше (как это сразу не рассматривать?)
                         for (
                             elm ← mdl.elements.values;
                             elemId = elm.getId;
                             dirProc = cacheDirect(elemId).exists(_.containsSlice(tokIdxs));
                             sparseProc = cacheSparse(elemId).exists(_.containsSlice(tokIdxs))
-                            if (!dirProc || !sparseProc) && !alreadyMarked(toks, elemId) && !matchExist(elemId, toks)
+                            if
+                                (!dirProc || !sparseProc) &&
+                                 // Checks whole tokens slice.
+                                !alreadyMarked(toks, elemId) && !matchExist(elemId, toks)
                         ) {
-                            //println(s"State=$elemId, dirProc=$dirProc, sparseProc=$sparseProc, cacheSparse(elemId)="+cacheSparse(elemId).mkString("|"))
                             // 1. SIMPLE.
                             found = false
 
@@ -662,9 +641,7 @@ object NCModelEnricher extends NCProbeEnricher with DecorateAsScala {
                                     }
 
                             // 2. DSL.
-                            val dslEnabled = state != SIMPLE
-
-                            if (dslEnabled && mdl.synonymsDsl.nonEmpty) {
+                            if (state != SIMPLE && mdl.synonymsDsl.nonEmpty) {
                                 found = false
 
                                 // 2.1 Sparse.
@@ -691,9 +668,42 @@ object NCModelEnricher extends NCProbeEnricher with DecorateAsScala {
                         }
                     }
 
-                    processMatches(ns, mdlId, matches)
+                    for ((m, idx) ← matches.zipWithIndex) {
+                        if (DEEP_DEBUG)
+                            logger.trace(
+                                s"Model '$mdlId' element found (${idx + 1} of ${matches.size}) [" +
+                                    s"elementId=${m.element.getId}, " +
+                                    s"synonym=${m.synonym}, " +
+                                    s"tokens=${tokString(m.tokens)}" +
+                                    s"]"
+                            )
+
+                        val tokIdxs = m.tokens.map(_.index)
+                        val direct = m.synonym.isDirect && !tokIdxs.zip(tokIdxs.tail).exists { case (x, y) ⇒ x > y }
+
+                        var added = false
+
+                        // Checks element's tokens.
+                        if (!alreadyMarked(m.tokens, m.element.getId)) {
+                            mark(ns, m.element, m.tokens, direct, syn = Some(m.synonym), metaOpt = None, m.parts, m.allToksIdxs)
+
+                            added = true
+                        }
+
+                        if (DEEP_DEBUG)
+                            logger.trace(
+                                s"Element ${if (added) "added" else "skipped"} [" +
+                                    s"id=${m.element.getId}, " +
+                                    s"indexes=${m.tokens.map(_.index).mkString("|")}, " +
+                                    s"allTokensIndexes=${m.allToksIdxs.mkString("|")}, " +
+                                    s"]"
+                            )
+                    }
                 }
 
+                if (DEEP_DEBUG)
+                    logger.trace(s"Exexucution started with state: $state")
+
                 go()
 
                 if (state == SIMPLE) {
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/sentence/NCSentenceManager.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/sentence/NCSentenceManager.scala
index 2776677..541966a 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/sentence/NCSentenceManager.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/sentence/NCSentenceManager.scala
@@ -733,9 +733,22 @@ object NCSentenceManager extends NCService {
             )
         )
 
+        def notNlpNotes(s: NCNlpSentence): Seq[NCNlpSentenceNote] = s.flatten.filter(!_.isNlp)
+
+        // Drops similar sentences (with same notes structure). Keeps with more found.
+        sens = sens.groupBy(notNlpNotes(_).groupBy(_.noteType).keys.toSeq.sorted.distinct).
+            flatMap(p ⇒ {
+                val m: Map[NCNlpSentence, Int] = p._2.map(p ⇒ p → notNlpNotes(p).size).toMap
+
+                val max = m.values.max
+
+                m.filter(_._2 == max).keys
+            }).
+            toSeq
+
         // Drops similar sentences (with same tokens structure).
         // Among similar sentences we prefer one with minimal free words count.
-        sens.groupBy(_.flatten.filter(!_.isNlp).map(_.getKey(withIndexes = false))).
+        sens.groupBy(notNlpNotes(_).map(_.getKey(withIndexes = false))).
             map { case (_, seq) ⇒ seq.minBy(_.filter(p ⇒ p.isNlp && !p.isStopWord).map(_.wordIndexes.length).sum) }.
             toSeq
     }
diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/model/sparse/NCSparseSpec.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/model/sparse/NCSparseSpec.scala
index 37df085..8441532 100644
--- a/nlpcraft/src/test/scala/org/apache/nlpcraft/model/sparse/NCSparseSpec.scala
+++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/model/sparse/NCSparseSpec.scala
@@ -36,17 +36,20 @@ class NCSparseModel extends NCAbstractTokensModel {
         val variants = ctx.getVariants.asScala
 
         def checkOneVariant(sparsity: Int): Unit = {
-            require(variants.size == 1)
+            require(variants.size == 1, "There is should be single variant.")
 
             val toks = variants.head.asScala.filter(_.getId == "xyz")
 
-            require(toks.size == 3)
+            require(toks.size == 3, "There are should be 3 `xyz` tokens.")
 
             checkSparsity(sparsity, toks)
         }
 
         def checkSparsity(sparsity: Int, toks: mutable.Buffer[NCToken]): Unit =
-            require(toks.forall(_.getMetadata.get("nlpcraft:nlp:sparsity").asInstanceOf[Int] == sparsity))
+            require(
+                toks.forall(_.getMetadata.get("nlpcraft:nlp:sparsity").asInstanceOf[Int] == sparsity),
+                s"Sparsity of each tokens should be: $sparsity."
+            )
 
         def checkExists(sparsity: Int): Unit =
             require(
@@ -58,9 +61,11 @@ class NCSparseModel extends NCAbstractTokensModel {
                             checkSparsity(sparsity, toks)
 
                             true
-                        case _ ⇒ false
+                        case _ ⇒
+                            false
                     }
-                })
+                }),
+                s"Variant with 3 `xyz` tokens should be exists."
             )
 
         ctx.getRequest.getNormalizedText match {

[incubator-nlpcraft] 03/03: WIP.

Posted by se...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

sergeykamov pushed a commit to branch NLPCRAFT-287
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git

commit 8a273487ff2fa20eb5a97b7f7fb3d7a6e3930c63
Author: Sergey Kamov <sk...@gmail.com>
AuthorDate: Fri Apr 9 14:26:15 2021 +0300

    WIP.
---
 .../org/apache/nlpcraft/probe/mgrs/deploy/NCDeployManager.scala    | 4 +---
 .../apache/nlpcraft/probe/mgrs/nlp/NCProbeEnrichmentManager.scala  | 2 --
 .../nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala  | 7 +++----
 3 files changed, 4 insertions(+), 9 deletions(-)

diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCDeployManager.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCDeployManager.scala
index 04ed091..32d2dc5 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCDeployManager.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCDeployManager.scala
@@ -195,7 +195,6 @@ object NCDeployManager extends NCService with DecorateAsScala {
 
         val syns = mutable.HashSet.empty[SynonymHolder]
 
-        // TODO: Sparse for nonDSL
         def ok(b: Boolean, exp: Boolean): Boolean = if (exp) b else !b
         def filterDsl(syns: Set[SynonymHolder], dsl: Boolean): Set[SynonymHolder] =
             syns.filter(s ⇒ ok(s.syn.exists(_.kind == IDL), dsl))
@@ -504,9 +503,8 @@ object NCDeployManager extends NCService with DecorateAsScala {
         else
             logger.warn(s"Model has no intent: $mdlId")
 
-        // TODO: sort!!!
         def toMap(set: Set[SynonymHolder]): Map[String, Seq[NCProbeSynonym]] =
-            set.groupBy(_.elmId).map(p ⇒ p._1 → p._2.map(_.syn).toSeq.sortBy(-_.size))
+            set.groupBy(_.elmId).map(p ⇒ p._1 → p._2.map(_.syn).toSeq.sorted.reverse)
 
         val notDsl = filterDsl(syns.toSet, dsl = false)
 
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/NCProbeEnrichmentManager.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/NCProbeEnrichmentManager.scala
index a1dbdac..4a1466e 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/NCProbeEnrichmentManager.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/NCProbeEnrichmentManager.scala
@@ -486,8 +486,6 @@ object NCProbeEnrichmentManager extends NCService with NCOpenCensusModelStats {
                 // Loop has sense if model is complex (has user defined parsers or IDL based synonyms)
                 continue = NCModelEnricher.isComplex(mdl) && res.exists { case (_, same) ⇒ !same }
 
-                nlpSen.firstProbePhase = false
-
                 if (DEEP_DEBUG)
                     if (continue) {
                         val changed = res.filter(!_._2).keys.map(_.getClass.getSimpleName).mkString(", ")
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
index d668c02..1da1059 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
@@ -26,7 +26,7 @@ import org.apache.nlpcraft.probe.mgrs.NCProbeSynonymChunkKind.{NCSynonymChunkKin
 import org.apache.nlpcraft.probe.mgrs.nlp.NCProbeEnricher
 import org.apache.nlpcraft.probe.mgrs.nlp.impl.NCRequestImpl
 import org.apache.nlpcraft.probe.mgrs.sentence.NCSentenceManager
-import org.apache.nlpcraft.probe.mgrs.{NCProbeModel, NCProbeVariants, NCProbeSynonym ⇒Synonym}
+import org.apache.nlpcraft.probe.mgrs.{NCProbeModel, NCProbeVariants, NCProbeSynonym ⇒ Synonym}
 
 import java.io.Serializable
 import java.util
@@ -93,7 +93,6 @@ object NCModelEnricher extends NCProbeEnricher with DecorateAsScala {
             else
                 wordIndexes.subsetOf(indexes)
 
-
         override def equals(obj: Any): Boolean = obj match {
             case x: Complex ⇒
                 hash == x.hash && (isToken && x.isToken && token == x.token || isWord && x.isWord && word == x.word)
@@ -618,7 +617,7 @@ object NCModelEnricher extends NCProbeEnricher with DecorateAsScala {
                                         def tryScan(syns: Seq[Synonym]): Unit =
                                             for (s ← syns if !found)
                                                 if (s.isMatch(toks))
-                                                    add("direct simple2", elm, cacheDirect, toks, tokIdxs, s)
+                                                    add("scan simple", elm, cacheDirect, toks, tokIdxs, s)
 
                                         tryMap(
                                             h.txtDirectSynonyms,
@@ -650,7 +649,7 @@ object NCModelEnricher extends NCProbeEnricher with DecorateAsScala {
                                         for (s ← get(mdl.synonymsDsl, elemId); comb ← dslCombs if !found)
                                             s.trySparseMatch(comb.map(_.data), req) match {
                                                 case Some(res) ⇒
-                                                    add("DSL", elm, cacheSparse, toTokens(res, ns), tokIdxs, s, toParts(res, s))
+                                                    add("sparse DSL", elm, cacheSparse, toTokens(res, ns), tokIdxs, s, toParts(res, s))
                                                     dslCache += comb
                                                 case None ⇒ // No-op.
                                             }