You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@nlpcraft.apache.org by se...@apache.org on 2021/04/08 19:48:35 UTC

[incubator-nlpcraft] branch NLPCRAFT-287 updated (91a5253 -> bbeecf6)

This is an automated email from the ASF dual-hosted git repository.

sergeykamov pushed a change to branch NLPCRAFT-287
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git.


    from 91a5253  WIP.
     new 86bccc9  WIP.
     new 968fabd  WIP.
     new a108490  WIP.
     new 70b9c9b  WIP.
     new 1f47a98  WIP.
     new c8e4898  WIP.
     new bbeecf6  WIP.

The 7 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 .../apache/nlpcraft/common/nlp/NCNlpSentence.scala |   6 +-
 .../apache/nlpcraft/probe/mgrs/NCProbeModel.scala  |  15 +-
 .../probe/mgrs/deploy/NCDeployManager.scala        |  16 +-
 .../nlpcraft/probe/mgrs/model/NCModelManager.scala |  26 +-
 .../probe/mgrs/nlp/NCProbeEnrichmentManager.scala  |   2 +
 .../mgrs/nlp/enrichers/model/NCModelEnricher.scala | 666 ++++++++++++---------
 .../mgrs/nlp/enrichers/sort/NCSortEnricher.scala   |  21 +-
 .../probe/mgrs/sentence/NCSentenceManager.scala    |   7 +-
 .../nlpcraft/examples/sql/NCSqlModelSpec.scala     |   7 -
 .../model/NCEnricherNestedModelSpec.scala          |   6 +-
 .../model/NCEnricherNestedModelSpec2.scala         |   5 +-
 11 files changed, 427 insertions(+), 350 deletions(-)

[incubator-nlpcraft] 01/07: WIP.

Posted by se...@apache.org.

This is an automated email from the ASF dual-hosted git repository.

sergeykamov pushed a commit to branch NLPCRAFT-287
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git

commit 86bccc9c31dcb24cbcfff1783e81cc868fff8803
Author: Sergey Kamov <sk...@gmail.com>
AuthorDate: Wed Apr 7 12:25:41 2021 +0300

    WIP.
---
 .../mgrs/nlp/enrichers/sort/NCSortEnricher.scala    | 21 ++++++++++-----------
 .../nlpcraft/examples/sql/NCSqlModelSpec.scala      |  7 -------
 2 files changed, 10 insertions(+), 18 deletions(-)

diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/sort/NCSortEnricher.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/sort/NCSortEnricher.scala
index 95c123e..4d0b270 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/sort/NCSortEnricher.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/sort/NCSortEnricher.scala
@@ -188,7 +188,7 @@ object NCSortEnricher extends NCProbeEnricher {
       *
       * @param toksNoteData
       */
-    private def split(toks: Seq[NCNlpSentenceToken], toksNoteData: Seq[NoteData], nullable: Boolean): Seq[Seq[NoteData]] = {
+    private def split(toks: Seq[NCNlpSentenceToken], othersRefs: Seq[NCNlpSentenceToken], toksNoteData: Seq[NoteData], nullable: Boolean): Seq[Seq[NoteData]] = {
         val res =
             if (toksNoteData.nonEmpty) {
                 val res = mutable.ArrayBuffer.empty[Seq[NoteData]]
@@ -205,8 +205,10 @@ object NCSortEnricher extends NCProbeEnricher {
                     between.isEmpty || between.forall(p ⇒ p.isStopWord || p.stem == stemAnd)
                 }
 
-                val minIdx = toks.dropWhile(t ⇒ !isUserNotValue(t)).head.index
-                val maxIdx = toks.reverse.dropWhile(t ⇒ !isUserNotValue(t)).head.index
+                val toks2 = toks.filter(othersRefs.contains)
+
+                val minIdx = toks2.dropWhile(t ⇒ !isUserNotValue(t)).head.index
+                val maxIdx = toks2.reverse.dropWhile(t ⇒ !isUserNotValue(t)).head.index
 
                 require(minIdx <= maxIdx)
 
@@ -311,12 +313,9 @@ object NCSortEnricher extends NCProbeEnricher {
             val others = toks.filter(t ⇒ !all.contains(t))
 
             if (others.nonEmpty) {
-                val i1 = others.head.index
-                val i2 = others.last.index
+                val idxs = others.map(_.index).toSet
 
-                val othersRefs = others.filter(
-                    t ⇒ t.exists(n ⇒ isUserNotValue(n) && n.tokenIndexes.head >= i1 && n.tokenIndexes.last <= i2)
-                )
+                val othersRefs = others.filter(t ⇒ t.exists(n ⇒ isUserNotValue(n) && n.tokenIndexes.toSet.subsetOf(idxs)))
 
                 if (
                     othersRefs.nonEmpty &&
@@ -348,12 +347,12 @@ object NCSortEnricher extends NCProbeEnricher {
                             if (data1.nonEmpty || data2.nonEmpty) {
                                 val seq1 =
                                     if (data1.nonEmpty)
-                                        split(part1, data1, nullable = false)
+                                        split(part1, othersRefs, data1, nullable = false)
                                     else
-                                        split(part2, data2, nullable = false)
+                                        split(part2, othersRefs, data2, nullable = false)
                                 val seq2 =
                                     if (data1.nonEmpty && data2.nonEmpty)
-                                        split(part2, data2, nullable = true)
+                                        split(part2, othersRefs, data2, nullable = true)
                                     else
                                         Seq.empty
                                 val asc = orderOpt.flatMap(o ⇒ Some(order(o.synonymIndex)._2))
diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/examples/sql/NCSqlModelSpec.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/examples/sql/NCSqlModelSpec.scala
index 3483bd4..55dd983 100644
--- a/nlpcraft/src/test/scala/org/apache/nlpcraft/examples/sql/NCSqlModelSpec.scala
+++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/examples/sql/NCSqlModelSpec.scala
@@ -107,13 +107,6 @@ class NCSqlModelSpec extends NCEnricherBaseSpec {
                 usr(text = "ship date", id = "col:date"),
             ),
             _ ⇒ checkExists(
-                txt = "give me the orders sorted by ship date",
-                nlp(text = "give me the", isStop = true),
-                usr(text = "orders", id = "tbl:orders"),
-                srt(text = "sorted by", typ = BY_ONLY, note = "col:date", index = 3),
-                usr(text = "ship date", id = "col:date"),
-            ),
-            _ ⇒ checkExists(
                 txt = "give me the orders sorted by ship date asc",
                 nlp(text = "give me the", isStop = true),
                 usr(text = "orders", id = "tbl:orders"),

[incubator-nlpcraft] 03/07: WIP.

Posted by se...@apache.org.

This is an automated email from the ASF dual-hosted git repository.

sergeykamov pushed a commit to branch NLPCRAFT-287
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git

commit a1084902c83f96f7c55868cc2f9d82f56d78322b
Author: Sergey Kamov <sk...@gmail.com>
AuthorDate: Wed Apr 7 20:58:16 2021 +0300

    WIP.
---
 .../apache/nlpcraft/probe/mgrs/sentence/NCSentenceManager.scala    | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/sentence/NCSentenceManager.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/sentence/NCSentenceManager.scala
index 7d011a0..2776677 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/sentence/NCSentenceManager.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/sentence/NCSentenceManager.scala
@@ -23,11 +23,10 @@ import org.apache.nlpcraft.common.nlp.pos.NCPennTreebank
 import org.apache.nlpcraft.common.nlp.{NCNlpSentence, NCNlpSentenceNote, NCNlpSentenceToken}
 import org.apache.nlpcraft.common.{NCE, NCService, U}
 import org.apache.nlpcraft.model.NCModel
-import org.apache.nlpcraft.model.impl.NCTokenLogger
 
-import java.io.{Serializable => JSerializable}
+import java.io.{Serializable ⇒ JSerializable}
 import java.util
-import java.util.{List => JList}
+import java.util.{List ⇒ JList}
 import scala.collection.JavaConverters.{asScalaBufferConverter, _}
 import scala.collection.{Map, Seq, mutable}
 import scala.language.implicitConversions
@@ -679,7 +678,7 @@ object NCSentenceManager extends NCService {
 
         var sens =
             if (delCombs.nonEmpty) {
-                val toksByIdx: Seq[Set[NCNlpSentenceNote]] =
+                val toksByIdx =
                     delCombs.flatMap(note ⇒ note.wordIndexes.map(_ → note)).
                         groupBy { case (idx, _) ⇒ idx }.
                         map { case (_, seq) ⇒ seq.map { case (_, note) ⇒ note }.toSet }.

[incubator-nlpcraft] 05/07: WIP.

Posted by se...@apache.org.

This is an automated email from the ASF dual-hosted git repository.

sergeykamov pushed a commit to branch NLPCRAFT-287
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git

commit 1f47a982c25c0b82802b8881277bb51e1a6f3442
Author: Sergey Kamov <sk...@gmail.com>
AuthorDate: Thu Apr 8 18:57:21 2021 +0300

    WIP.
---
 .../apache/nlpcraft/probe/mgrs/NCProbeModel.scala  |   9 +-
 .../probe/mgrs/deploy/NCDeployManager.scala        |  15 +-
 .../nlpcraft/probe/mgrs/model/NCModelManager.scala |  24 ++-
 .../mgrs/nlp/enrichers/model/NCModelEnricher.scala | 161 ++++++++++-----------
 4 files changed, 103 insertions(+), 106 deletions(-)

diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeModel.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeModel.scala
index 0e418b3..31fa627 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeModel.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeModel.scala
@@ -30,8 +30,7 @@ import scala.collection.{Map, Seq}
   * @param intents
   * @param directSynonyms
   * @param sparseSynonyms
-  * @param directSynonymsDsl
-  * @param addStopWordsStems
+  * @param synonymsDsl
   * @param exclStopWordsStems
   * @param suspWordsStems
   * @param elements
@@ -42,13 +41,13 @@ case class NCProbeModel(
     intents: Seq[NCIdlIntent],
     directSynonyms: Map[String /*Element ID*/ , Map[Int /*Synonym length*/ , NCProbeSynonymsWrapper]], // Fast access map.
     sparseSynonyms: Map[String /*Element ID*/, Seq[NCProbeSynonym]],
-    directSynonymsDsl: Map[String /*Element ID*/ , Seq[NCProbeSynonym]], // Fast access map.
-    sparseSynonymsDsl: Map[String /*Element ID*/ , Seq[NCProbeSynonym]],
+    synonymsDsl: Map[String /*Element ID*/ , Seq[NCProbeSynonym]], // Fast access map.
     addStopWordsStems: Set[String],
     exclStopWordsStems: Set[String],
     suspWordsStems: Set[String],
     elements: Map[String /*Element ID*/ , NCElement],
     samples: Set[(String, Seq[Seq[String]])]
 ) {
-    def hasDslSynonyms(elemId: String): Boolean = directSynonymsDsl.contains(elemId) || sparseSynonymsDsl.contains(elemId)
+    def hasDslSynonyms(elemId: String): Boolean = synonymsDsl.contains(elemId)
+    def hasDslSynonyms: Boolean = synonymsDsl.nonEmpty
 }
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCDeployManager.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCDeployManager.scala
index aa3b99e..04ed091 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCDeployManager.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCDeployManager.scala
@@ -197,8 +197,10 @@ object NCDeployManager extends NCService with DecorateAsScala {
 
         // TODO: Sparse for nonDSL
         def ok(b: Boolean, exp: Boolean): Boolean = if (exp) b else !b
-        def filter(dsl: Boolean, sparse: Boolean): Set[SynonymHolder] =
-            syns.toSet.filter(s ⇒ ok(s.syn.exists(_.kind == IDL), dsl) && ok(s.sparse && s.syn.size > 1, sparse))
+        def filterDsl(syns: Set[SynonymHolder], dsl: Boolean): Set[SynonymHolder] =
+            syns.filter(s ⇒ ok(s.syn.exists(_.kind == IDL), dsl))
+        def filterSparse(syns: Set[SynonymHolder], sparse: Boolean): Set[SynonymHolder] =
+            syns.filter(s ⇒ ok(s.sparse && s.syn.size > 1, sparse))
 
         var cnt = 0
         val maxCnt = mdl.getMaxTotalSynonyms
@@ -506,14 +508,15 @@ object NCDeployManager extends NCService with DecorateAsScala {
         def toMap(set: Set[SynonymHolder]): Map[String, Seq[NCProbeSynonym]] =
             set.groupBy(_.elmId).map(p ⇒ p._1 → p._2.map(_.syn).toSeq.sortBy(-_.size))
 
+        val notDsl = filterDsl(syns.toSet, dsl = false)
+
         NCProbeModel(
             model = mdl,
             solver = solver,
             intents = intents.map(_._1).toSeq,
-            directSynonyms = mkFastAccessMap(filter(dsl = false, sparse = false), NCProbeSynonymsWrapper(_)),
-            sparseSynonyms = toMap(filter(dsl = false, sparse = true)),
-            directSynonymsDsl = toMap(filter(dsl = true, sparse = false)),
-            sparseSynonymsDsl = toMap(filter(dsl = true, sparse = true)),
+            directSynonyms = mkFastAccessMap(filterSparse(notDsl, sparse = false), NCProbeSynonymsWrapper(_)),
+            sparseSynonyms = toMap(filterSparse(notDsl, sparse = true)),
+            synonymsDsl = toMap(filterDsl(syns.toSet, dsl = true)),
             addStopWordsStems = addStopWords,
             exclStopWordsStems = exclStopWords,
             suspWordsStems = suspWords,
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/model/NCModelManager.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/model/NCModelManager.scala
index 457bf35..ff0cb78 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/model/NCModelManager.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/model/NCModelManager.scala
@@ -58,26 +58,24 @@ object NCModelManager extends NCService with DecorateAsScala {
             data.values.foreach(w ⇒ {
                 val mdl = w.model
 
-                val synCnt = w.directSynonyms.flatMap(_._2.map(_._2.count)).sum
-                val synDslCnt = w.directSynonymsDsl.map(_._2.size).sum
+                val synDirectCnt = w.directSynonyms.flatMap(_._2.map(_._2.count)).sum
                 val synSparseCnt = w.sparseSynonyms.map(_._2.size).sum
-                val synSparseDslCnt = w.sparseSynonymsDsl.map(_._2.size).sum
+                val synDslCnt = w.synonymsDsl.map(_._2.size).sum
                 val elmCnt = w.elements.keySet.size
                 val intentCnt = w.intents.size
 
                 def withWarn(i: Int): String = if (i == 0) s"0 ${r("(!)")}" else i.toString
 
                 tbl += Seq(
-                    s"Name:                  ${bo(c(mdl.getName))}",
-                    s"ID:                    ${bo(mdl.getId)}",
-                    s"Version:               ${mdl.getVersion}",
-                    s"Origin:                ${mdl.getOrigin}",
-                    s"Elements:              ${withWarn(elmCnt)}",
-                    s"Synonyms(Direct)       $synCnt",
-                    s"Synonyms(Direct, DSL): $synDslCnt",
-                    s"Synonyms(Sparse):      $synSparseCnt",
-                    s"Synonyms(Sparse, DSL): $synSparseDslCnt",
-                    s"Intents:               ${withWarn(intentCnt)}"
+                    s"Name:             ${bo(c(mdl.getName))}",
+                    s"ID:               ${bo(mdl.getId)}",
+                    s"Version:          ${mdl.getVersion}",
+                    s"Origin:           ${mdl.getOrigin}",
+                    s"Elements:         ${withWarn(elmCnt)}",
+                    s"Synonyms(Direct)  $synDirectCnt",
+                    s"Synonyms(Sparse): $synSparseCnt",
+                    s"Synonyms(DSL):    $synDslCnt",
+                    s"Intents:          ${withWarn(intentCnt)}"
                 )
             })
         }
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
index 0542174..5169afe 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
@@ -373,30 +373,31 @@ object NCModelEnricher extends NCProbeEnricher with DecorateAsScala {
 
     /**
       *
-      * @param comb
-      * @param syn
+      * @param seq
+      * @param s
       */
-    private def getPartsComplex(comb: Seq[Complex], syn: Synonym): Seq[TokType] =
-        comb.zip(syn.map(_.kind)).flatMap {
+    private def toPartsComplex(seq: Seq[Complex], s: Synonym): Seq[TokType] =
+        seq.zip(s.map(_.kind)).flatMap {
             case (complex, kind) ⇒ if (complex.isToken) Some(complex.token → kind)
             else None
         }
 
     /**
       *
-      * @param comb
-      * @param syn
+      * @param seq
+      * @param s
       */
-    private def toParts(comb: Seq[NCDslContent], syn: Synonym): Seq[TokType] =
-        comb.zip(syn.map(_.kind)).flatMap {
+    private def toParts(seq: Seq[NCDslContent], s: Synonym): Seq[TokType] =
+        seq.zip(s.map(_.kind)).flatMap {
             case (complex, kind) ⇒ if (complex.isLeft) Some(complex.left.get → kind) else None
         }
 
     /**
       *
       */
-    private def mkCache(): Cache =
-        mutable.HashMap.empty[String, mutable.ArrayBuffer[Seq[Int]]].withDefault(_ ⇒ mutable.ArrayBuffer.empty[Seq[Int]])
+    private def mkCache(mdl: NCProbeModel): Cache =
+        mutable.HashMap.empty[String, mutable.ArrayBuffer[Seq[Int]]].empty ++
+            mdl.elements.keys.map(k ⇒ k → mutable.ArrayBuffer.empty[Seq[Int]])
 
     /**
       *
@@ -478,24 +479,21 @@ object NCModelEnricher extends NCProbeEnricher with DecorateAsScala {
       * @param h
       * @param toks
       */
-    private def mkComplexCombinations(h: ComplexHolder, toks: Seq[NlpToken]): Seq[Seq[Complex]] = {
-        val idxsSeq = toks.flatMap(_.wordIndexes)
-//        val idxsSorted = idxsSeq.sorted
-        val idxs = idxsSeq.toSet
-//        val idxMin = idxsSorted.head
-//        val idxMax = idxsSorted.last
+    private def mkComplexCombinations(h: ComplexHolder, toks: Seq[NlpToken], cache: Set[Seq[Complex]]): Seq[Seq[Complex]] = {
+        val idxs = toks.flatMap(_.wordIndexes).toSet
 
         h.complexes.par.
             flatMap(complexSeq ⇒ {
                 //val rec = complexSeq.tokensComplexes.filter(_.isSubsetOf(idxMin, idxMax, idxs))
-                val rec = complexSeq.tokensComplexes.filter(_.wordIndexes.exists(idxsSeq.contains))
+                val rec = complexSeq.tokensComplexes.filter(_.wordIndexes.exists(idxs.contains))
 
                 // Drops without tokens (IDL part works with tokens).
-                if (rec.nonEmpty)
-                    Some(
-                        rec ++
+                if (rec.nonEmpty) {
+                    val data = rec ++
                         (complexSeq.wordsIndexes.intersect(idxs) -- rec.flatMap(_.wordIndexes)).map(h.complexesWords)
-                    )
+
+                    if (!cache.contains(data)) Some(data) else None
+                }
                 else
                     None
             }).seq
@@ -569,31 +567,8 @@ object NCModelEnricher extends NCProbeEnricher with DecorateAsScala {
 
         startScopedSpan("enrich", parent, "srvReqId" → srvReqId, "mdlId" → mdlId, "txt" → ns.text) { span ⇒
             val req = NCRequestImpl(senMeta, srvReqId)
-            val matches = mutable.ArrayBuffer.empty[ElementMatch]
-            val cacheSparse = mkCache()
-            val cacheDirect = mkCache()
             val h = mkComplexes(mdl, ns)
 
-            var found = false
-
-            def add(typ: String, elm: NCElement, cache: Cache, res: Seq[NlpToken], tokIdxs: Seq[Int], s: Synonym, parts: Seq[TokType]): Unit = {
-                val toksSet = res.toSet
-
-                var added = false
-
-                // TODO:
-                if (!matches.exists(m ⇒ m.element.getId == elm.getId && toksSet.subsetOf(m.tokensSet))) {
-                    matches += ElementMatch(elm, res, s, parts)
-
-                    added = true
-                }
-
-                cache(elm.getId) += tokIdxs
-                found = true
-
-                println(s"ADDED: ${elm.getId}, type=$typ, res=${res.map(_.origText).mkString("|")}, tokIdxs=${tokIdxs.mkString("|")}, added=$added")
-            }
-
             startScopedSpan("synsProc", span, "srvReqId" → srvReqId, "mdlId" → mdlId, "txt" → ns.text) { _ ⇒
                 var state = if (ns.firstProbePhase) SIMPLE else DSL_NEXT
                 ns.firstProbePhase = false
@@ -603,9 +578,36 @@ object NCModelEnricher extends NCProbeEnricher with DecorateAsScala {
                     println
                     println(s"GO $state")
 
+                    val matches = mutable.ArrayBuffer.empty[ElementMatch]
+
+                    val cacheSparse = mkCache(mdl)
+                    val cacheDirect = mkCache(mdl)
+                    val dslCache = mutable.HashSet.empty[Seq[Complex]]
+
+                    var found = false
+
+                    def add(typ: String, elm: NCElement, cache: Cache, res: Seq[NlpToken], tokIdxs: Seq[Int], s: Synonym, parts: Seq[TokType] = Seq.empty): Unit = {
+                        var added = false
+
+                        if (!matchExist(elm.getId, res)) {
+                            matches += ElementMatch(elm, res, s, parts)
+
+                            added = true
+                        }
+
+                        cache(elm.getId) += tokIdxs
+                        found = true
+
+                        println(s"ADDED: ${elm.getId}, type=$typ, res=${res.map(_.origText).mkString("|")}, toks=${tokIdxs.mkString("|")}, added=$added")
+                    }
+
+                    // TODO:
+                    def matchExist(elemId: String, toks: Seq[NlpToken]): Boolean =
+                        matches.exists(m ⇒ m.element.getId == elemId && toks.toSet.subsetOf(m.tokensSet))
+
                     for (toks ← combosToks) {
                         val tokIdxs = toks.map(_.index)
-                        lazy val dslCombs: Seq[Seq[Complex]] = mkComplexCombinations(h, toks)
+                        lazy val dslCombs: Seq[Seq[Complex]] = mkComplexCombinations(h, toks, dslCache.toSet)
                         lazy val tokStems = toks.map(_.stem).mkString(" ")
 
                         // Attempt to match each element.
@@ -613,12 +615,11 @@ object NCModelEnricher extends NCProbeEnricher with DecorateAsScala {
                         for (
                             elm ← mdl.elements.values;
                             elemId = elm.getId;
-                            if
-                                !alreadyMarked(toks, elm.getId)
+                            dirProc = cacheDirect(elemId).exists(_.containsSlice(tokIdxs));
+                            sparseProc = cacheSparse(elemId).exists(_.containsSlice(tokIdxs))
+                            if (!dirProc || !sparseProc) && !alreadyMarked(toks, elemId) && !matchExist(elemId, toks)
                         ) {
-                            val directProc = cacheDirect(elemId).exists(_.containsSlice(tokIdxs))
-                            val sparseProc = cacheSparse(elemId).exists(_.containsSlice(tokIdxs))
-
+                            //println(s"State=$elemId, dirProc=$dirProc, sparseProc=$sparseProc, cacheSparse(elemId)="+cacheSparse(elemId).mkString("|"))
                             // 1. SIMPLE.
                             found = false
 
@@ -630,19 +631,19 @@ object NCModelEnricher extends NCProbeEnricher with DecorateAsScala {
                                 }
 
                             // 1.1 Direct.
-                            if (simpleEnabled && !directProc && !found)
+                            if (simpleEnabled && !dirProc && !found)
                                 fastAccess(mdl.directSynonyms, elemId, toks.length) match {
                                     case Some(h) ⇒
                                         def tryMap(syns: Map[String, Synonym], notFound: () ⇒ Unit): Unit =
                                             syns.get(tokStems) match {
-                                                case Some(s) ⇒ add("direct simple", elm, cacheDirect, toks, tokIdxs, s, Seq.empty)
+                                                case Some(s) ⇒ add("direct simple", elm, cacheDirect, toks, tokIdxs, s)
                                                 case None ⇒ notFound()
                                             }
 
                                         def tryScan(syns: Seq[Synonym]): Unit =
                                             for (s ← syns if !found)
                                                 if (s.isMatch(toks))
-                                                    add("direct simple2", elm, cacheDirect, toks, tokIdxs, s, Seq.empty)
+                                                    add("direct simple2", elm, cacheDirect, toks, tokIdxs, s)
 
                                         tryMap(
                                             h.txtDirectSynonyms,
@@ -660,34 +661,37 @@ object NCModelEnricher extends NCProbeEnricher with DecorateAsScala {
                             if (simpleEnabled && !sparseProc && !found)
                                 for (s ← get(mdl.sparseSynonyms, elemId) if !found)
                                     s.trySparseMatch(toks) match {
-                                        case Some(res) ⇒ add("sparse simple", elm, cacheSparse, res, tokIdxs, s, Seq.empty)
+                                        case Some(res) ⇒ add("sparse simple", elm, cacheSparse, res, tokIdxs, s)
                                         case None ⇒ // No-op.
                                     }
 
                             // 2. DSL.
-                            found = false
                             val dslEnabled = state != SIMPLE
 
-                            // 2.1 Direct.
-                            if (dslEnabled && mdl.directSynonymsDsl.nonEmpty && !directProc && !found)
-                                for (s ← get(mdl.directSynonymsDsl, elemId); comb ← dslCombs if !found) {
-                                    if (s.isMatch(comb.map(_.data), req)) {
-                                        println(s"OK $elemId for s=$s for toks:${toks.map(_.origText)}")
-
-                                        add("direct DSL", elm, cacheDirect, toks, tokIdxs, s, getPartsComplex(comb, s))
-                                    }
-                                    println {
-                                        println(s"NOT OK $elemId for s=$s for toks:${toks.map(_.origText)}")
-                                    }
+                            if (dslEnabled && mdl.synonymsDsl.nonEmpty) {
+                                found = false
+
+                                // 2.1 Sparse.
+                                if (mdl.hasDslSynonyms) {
+                                    if (!sparseProc)
+                                        for (s ← get(mdl.synonymsDsl, elemId); comb ← dslCombs if !found)
+                                            s.trySparseMatch(comb.map(_.data), req) match {
+                                                case Some(res) ⇒
+                                                    add("DSL", elm, cacheSparse, toTokens(res, ns), tokIdxs, s, toParts(res, s))
+                                                    dslCache += comb
+                                                case None ⇒ // No-op.
+                                            }
                                 }
-
-                            // 2.2 Sparse.
-                            if (dslEnabled && mdl.sparseSynonymsDsl.nonEmpty && !sparseProc && !found)
-                                for (s ← get(mdl.sparseSynonymsDsl, elemId); comb ← dslCombs if !found)
-                                    s.trySparseMatch(comb.map(_.data), req) match {
-                                        case Some(res) ⇒ add("sparse DSL", elm, cacheSparse, toTokens(res, ns), tokIdxs, s, toParts(res, s))
-                                        case None ⇒ // No-op.
-                                    }
+                                // 2.2 Direct.
+                                else {
+                                    if (!dirProc)
+                                        for (s ← get(mdl.synonymsDsl, elemId); comb ← dslCombs if !found)
+                                            if (s.isMatch(comb.map(_.data), req)) {
+                                                add("direct DSL", elm, cacheDirect, toks, tokIdxs, s, toPartsComplex(comb, s))
+                                                dslCache += comb
+                                            }
+                                }
+                            }
                         }
                     }
 
@@ -701,18 +705,11 @@ object NCModelEnricher extends NCProbeEnricher with DecorateAsScala {
 
                     go()
                 }
-
-
             }
 
-
-
             processParsers(mdl, ns, span, req)
         }
     }
 
-    def isComplex(mdl: NCProbeModel): Boolean =
-        mdl.directSynonymsDsl.nonEmpty ||
-        mdl.sparseSynonymsDsl.nonEmpty ||
-        !mdl.model.getParsers.isEmpty
+    def isComplex(mdl: NCProbeModel): Boolean = mdl.synonymsDsl.nonEmpty || !mdl.model.getParsers.isEmpty
 }
\ No newline at end of file

[incubator-nlpcraft] 02/07: WIP.

Posted by se...@apache.org.

This is an automated email from the ASF dual-hosted git repository.

sergeykamov pushed a commit to branch NLPCRAFT-287
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git

commit 968fabdc4fa4f73ffa358f6e6ac301446dfa9982
Author: Sergey Kamov <sk...@gmail.com>
AuthorDate: Wed Apr 7 13:13:09 2021 +0300

    WIP.
---
 .../nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala   | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
index 604adde..f74346b 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
@@ -22,7 +22,7 @@ import org.apache.nlpcraft.common._
 import org.apache.nlpcraft.common.nlp.{NCNlpSentenceToken => NlpToken, _}
 import org.apache.nlpcraft.model._
 import org.apache.nlpcraft.probe.mgrs.NCProbeSynonym.NCDslContent
-import org.apache.nlpcraft.probe.mgrs.NCProbeSynonymChunkKind.{NCSynonymChunkKind, TEXT}
+import org.apache.nlpcraft.probe.mgrs.NCProbeSynonymChunkKind.{NCSynonymChunkKind, _}
 import org.apache.nlpcraft.probe.mgrs.nlp.NCProbeEnricher
 import org.apache.nlpcraft.probe.mgrs.nlp.impl.NCRequestImpl
 import org.apache.nlpcraft.probe.mgrs.sentence.NCSentenceManager
@@ -546,7 +546,9 @@ object NCModelEnricher extends NCProbeEnricher with DecorateAsScala {
                 val tokIdxs = m.tokens.map(_.index)
                 val direct = syn.isDirect && (tokIdxs == tokIdxs.sorted)
 
-                mark(ns, elem = elm, toks = m.tokens, direct = direct, syn = Some(syn), metaOpt = None, parts = m.parts)
+                // TODO:
+                if (!alreadyMarked(m.tokens, elm.getId))
+                    mark(ns, elem = elm, toks = m.tokens, direct = direct, syn = Some(syn), metaOpt = None, parts = m.parts)
             }
 
             val parsers = mdl.model.getParsers

[incubator-nlpcraft] 04/07: WIP.

Posted by se...@apache.org.

This is an automated email from the ASF dual-hosted git repository.

sergeykamov pushed a commit to branch NLPCRAFT-287
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git

commit 70b9c9b60dd148a40828e6becef7003fbd5441e0
Author: Sergey Kamov <sk...@gmail.com>
AuthorDate: Thu Apr 8 16:10:43 2021 +0300

    WIP.
---
 .../apache/nlpcraft/common/nlp/NCNlpSentence.scala |   6 +-
 .../apache/nlpcraft/probe/mgrs/NCProbeModel.scala  |  12 +-
 .../probe/mgrs/deploy/NCDeployManager.scala        |   7 +-
 .../nlpcraft/probe/mgrs/model/NCModelManager.scala |  26 +-
 .../probe/mgrs/nlp/NCProbeEnrichmentManager.scala  |   2 +
 .../mgrs/nlp/enrichers/model/NCModelEnricher.scala | 629 ++++++++++++---------
 .../model/NCEnricherNestedModelSpec.scala          |  50 +-
 .../model/NCEnricherNestedModelSpec2.scala         |   5 +-
 8 files changed, 414 insertions(+), 323 deletions(-)

diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala
index 91ca5a9..f2965cb 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala
@@ -48,7 +48,8 @@ class NCNlpSentence(
     override val tokens: mutable.ArrayBuffer[NCNlpSentenceToken] = new mutable.ArrayBuffer[NCNlpSentenceToken](32),
     private val deletedNotes: mutable.HashMap[NCNlpSentenceNote, Seq[NCNlpSentenceToken]] = mutable.HashMap.empty,
     private var initNlpNotes: Map[NoteKey, NCNlpSentenceNote] = null,
-    private val nlpTokens: mutable.HashMap[TokenKey, NCNlpSentenceToken] = mutable.HashMap.empty
+    private val nlpTokens: mutable.HashMap[TokenKey, NCNlpSentenceToken] = mutable.HashMap.empty,
+    var firstProbePhase: Boolean = true
 ) extends NCNlpSentenceTokenBuffer(tokens) with JSerializable {
     @transient
     private var hash: java.lang.Integer = _
@@ -65,7 +66,8 @@ class NCNlpSentence(
             tokens = tokens.map(_.clone()),
             deletedNotes = deletedNotes.map(p ⇒ p._1.clone() → p._2.map(_.clone())),
             initNlpNotes = initNlpNotes,
-            nlpTokens = nlpTokens
+            nlpTokens = nlpTokens,
+            firstProbePhase = firstProbePhase
         )
 
     /**
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeModel.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeModel.scala
index b99ddd0..0e418b3 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeModel.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeModel.scala
@@ -28,9 +28,9 @@ import scala.collection.{Map, Seq}
   * @param model
   * @param solver
   * @param intents
-  * @param nonSparseSynonyms
+  * @param directSynonyms
   * @param sparseSynonyms
-  * @param nonSparseSynonymsDsl
+  * @param directSynonymsDsl
   * @param addStopWordsStems
   * @param exclStopWordsStems
   * @param suspWordsStems
@@ -40,13 +40,15 @@ case class NCProbeModel(
     model: NCModel,
     solver: NCIntentSolver,
     intents: Seq[NCIdlIntent],
-    nonSparseSynonyms: Map[String /*Element ID*/ , Map[Int /*Synonym length*/ , NCProbeSynonymsWrapper]], // Fast access map.
+    directSynonyms: Map[String /*Element ID*/ , Map[Int /*Synonym length*/ , NCProbeSynonymsWrapper]], // Fast access map.
     sparseSynonyms: Map[String /*Element ID*/, Seq[NCProbeSynonym]],
-    nonSparseSynonymsDsl: Map[String /*Element ID*/ , Map[Int /*Synonym length*/ , Seq[NCProbeSynonym]]], // Fast access map.
+    directSynonymsDsl: Map[String /*Element ID*/ , Seq[NCProbeSynonym]], // Fast access map.
     sparseSynonymsDsl: Map[String /*Element ID*/ , Seq[NCProbeSynonym]],
     addStopWordsStems: Set[String],
     exclStopWordsStems: Set[String],
     suspWordsStems: Set[String],
     elements: Map[String /*Element ID*/ , NCElement],
     samples: Set[(String, Seq[Seq[String]])]
-)
+) {
+    def hasDslSynonyms(elemId: String): Boolean = directSynonymsDsl.contains(elemId) || sparseSynonymsDsl.contains(elemId)
+}
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCDeployManager.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCDeployManager.scala
index 06fe040..aa3b99e 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCDeployManager.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCDeployManager.scala
@@ -198,7 +198,7 @@ object NCDeployManager extends NCService with DecorateAsScala {
         // TODO: Sparse for nonDSL
         def ok(b: Boolean, exp: Boolean): Boolean = if (exp) b else !b
         def filter(dsl: Boolean, sparse: Boolean): Set[SynonymHolder] =
-            syns.toSet.filter(s ⇒ ok(s.syn.exists(_.kind == IDL), dsl) && ok(s.sparse, sparse))
+            syns.toSet.filter(s ⇒ ok(s.syn.exists(_.kind == IDL), dsl) && ok(s.sparse && s.syn.size > 1, sparse))
 
         var cnt = 0
         val maxCnt = mdl.getMaxTotalSynonyms
@@ -502,6 +502,7 @@ object NCDeployManager extends NCService with DecorateAsScala {
         else
             logger.warn(s"Model has no intent: $mdlId")
 
+        // TODO: sort!!!
         def toMap(set: Set[SynonymHolder]): Map[String, Seq[NCProbeSynonym]] =
             set.groupBy(_.elmId).map(p ⇒ p._1 → p._2.map(_.syn).toSeq.sortBy(-_.size))
 
@@ -509,9 +510,9 @@ object NCDeployManager extends NCService with DecorateAsScala {
             model = mdl,
             solver = solver,
             intents = intents.map(_._1).toSeq,
-            nonSparseSynonyms = mkFastAccessMap(filter(dsl = false, sparse = false), NCProbeSynonymsWrapper(_)),
+            directSynonyms = mkFastAccessMap(filter(dsl = false, sparse = false), NCProbeSynonymsWrapper(_)),
             sparseSynonyms = toMap(filter(dsl = false, sparse = true)),
-            nonSparseSynonymsDsl = mkFastAccessMap(filter(dsl = true, sparse = false), _.sorted.reverse),
+            directSynonymsDsl = toMap(filter(dsl = true, sparse = false)),
             sparseSynonymsDsl = toMap(filter(dsl = true, sparse = true)),
             addStopWordsStems = addStopWords,
             exclStopWordsStems = exclStopWords,
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/model/NCModelManager.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/model/NCModelManager.scala
index 03c59ff..457bf35 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/model/NCModelManager.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/model/NCModelManager.scala
@@ -58,26 +58,26 @@ object NCModelManager extends NCService with DecorateAsScala {
             data.values.foreach(w ⇒ {
                 val mdl = w.model
 
-                val synCnt = w.nonSparseSynonyms.flatMap(_._2.map(_._2.count)).sum
-                val synDslCnt = w.nonSparseSynonymsDsl.map(_._2.size).sum
+                val synCnt = w.directSynonyms.flatMap(_._2.map(_._2.count)).sum
+                val synDslCnt = w.directSynonymsDsl.map(_._2.size).sum
                 val synSparseCnt = w.sparseSynonyms.map(_._2.size).sum
                 val synSparseDslCnt = w.sparseSynonymsDsl.map(_._2.size).sum
                 val elmCnt = w.elements.keySet.size
                 val intentCnt = w.intents.size
 
-                def getWithWarning(i: Int): String = if (i == 0) s"0 ${r("(!)")}" else i.toString
+                def withWarn(i: Int): String = if (i == 0) s"0 ${r("(!)")}" else i.toString
 
                 tbl += Seq(
-                    s"Name:                      ${bo(c(mdl.getName))}",
-                    s"ID:                        ${bo(mdl.getId)}",
-                    s"Version:                   ${mdl.getVersion}",
-                    s"Origin:                    ${mdl.getOrigin}",
-                    s"Elements:                  ${getWithWarning(elmCnt)}",
-                    s"Synonyms(Continuous)       $synCnt",
-                    s"Synonyms(Continuous, DSL): $synDslCnt",
-                    s"Synonyms(Sparse):          $synSparseCnt",
-                    s"Synonyms(Sparse, DSL):     $synSparseDslCnt",
-                    s"Intents:                   ${getWithWarning(intentCnt)}"
+                    s"Name:                  ${bo(c(mdl.getName))}",
+                    s"ID:                    ${bo(mdl.getId)}",
+                    s"Version:               ${mdl.getVersion}",
+                    s"Origin:                ${mdl.getOrigin}",
+                    s"Elements:              ${withWarn(elmCnt)}",
+                    s"Synonyms(Direct)       $synCnt",
+                    s"Synonyms(Direct, DSL): $synDslCnt",
+                    s"Synonyms(Sparse):      $synSparseCnt",
+                    s"Synonyms(Sparse, DSL): $synSparseDslCnt",
+                    s"Intents:               ${withWarn(intentCnt)}"
                 )
             })
         }
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/NCProbeEnrichmentManager.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/NCProbeEnrichmentManager.scala
index 4a1466e..a1dbdac 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/NCProbeEnrichmentManager.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/NCProbeEnrichmentManager.scala
@@ -486,6 +486,8 @@ object NCProbeEnrichmentManager extends NCService with NCOpenCensusModelStats {
                 // Loop has sense if model is complex (has user defined parsers or IDL based synonyms)
                 continue = NCModelEnricher.isComplex(mdl) && res.exists { case (_, same) ⇒ !same }
 
+                nlpSen.firstProbePhase = false
+
                 if (DEEP_DEBUG)
                     if (continue) {
                         val changed = res.filter(!_._2).keys.map(_.getClass.getSimpleName).mkString(", ")
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
index f74346b..0542174 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
@@ -19,14 +19,14 @@ package org.apache.nlpcraft.probe.mgrs.nlp.enrichers.model
 
 import io.opencensus.trace.Span
 import org.apache.nlpcraft.common._
-import org.apache.nlpcraft.common.nlp.{NCNlpSentenceToken => NlpToken, _}
+import org.apache.nlpcraft.common.nlp.{NCNlpSentenceToken ⇒ NlpToken, _}
 import org.apache.nlpcraft.model._
 import org.apache.nlpcraft.probe.mgrs.NCProbeSynonym.NCDslContent
 import org.apache.nlpcraft.probe.mgrs.NCProbeSynonymChunkKind.{NCSynonymChunkKind, _}
 import org.apache.nlpcraft.probe.mgrs.nlp.NCProbeEnricher
 import org.apache.nlpcraft.probe.mgrs.nlp.impl.NCRequestImpl
 import org.apache.nlpcraft.probe.mgrs.sentence.NCSentenceManager
-import org.apache.nlpcraft.probe.mgrs.{NCProbeModel, NCProbeSynonym ⇒ Synonym, NCProbeVariants}
+import org.apache.nlpcraft.probe.mgrs.{NCProbeModel, NCProbeVariants, NCProbeSynonym ⇒ Synonym}
 
 import java.io.Serializable
 import java.util
@@ -91,6 +91,7 @@ object NCModelEnricher extends NCProbeEnricher with DecorateAsScala {
             else
                 wordIndexes.subsetOf(indexes)
 
+
         override def equals(obj: Any): Boolean = obj match {
             case x: Complex ⇒
                 hash == x.hash && (isToken && x.isToken && token == x.token || isWord && x.isWord && word == x.word)
@@ -101,7 +102,7 @@ object NCModelEnricher extends NCProbeEnricher with DecorateAsScala {
         override def toString: String = {
             val idxs = wordIndexes.mkString(",")
 
-            if (isToken) s"'$origText' (${token.getId}) [$idxs]]" else s"'$origText' [$idxs]"
+            if (isToken && token.getId != "nlpcraft:nlp") s"'$origText' (${token.getId}) [$idxs]]" else s"'$origText' [$idxs]"
         }
     }
 
@@ -125,6 +126,8 @@ object NCModelEnricher extends NCProbeEnricher with DecorateAsScala {
         override def toString: String = tokensComplexes.mkString(" | ")
     }
 
+    case class ComplexHolder(complexesWords: Seq[Complex], complexes: Seq[ComplexSeq])
+
     // Found-by-synonym model element.
     case class ElementMatch(
         element: NCElement,
@@ -164,6 +167,14 @@ object NCModelEnricher extends NCProbeEnricher with DecorateAsScala {
         override def toString: String = s"Element=${element.getId}, indexes=${tokens.map(_.index).mkString(",")}, synonym=$synonym"
     }
 
+    object State extends Enumeration {
+        type State = Value
+
+        val SIMPLE, DSL_FIRST, DSL_NEXT = Value
+    }
+
+    import State._
+
     /**
       *
       * @param parent Optional parent span.
@@ -246,6 +257,92 @@ object NCModelEnricher extends NCProbeEnricher with DecorateAsScala {
     }
 
     /**
+      *
+      * @param mdl
+      * @param ns
+      * @param span
+      * @param req
+      */
+    private def processParsers(mdl: NCProbeModel, ns: NCNlpSentence, span: Span, req: NCRequestImpl): Unit = {
+        val parsers = mdl.model.getParsers
+
+        for (parser ← parsers.asScala) {
+            parser.onInit()
+
+            startScopedSpan("customParser", span,
+                "srvReqId" → ns.srvReqId,
+                "mdlId" → mdl.model.getId,
+                "txt" → ns.text) { _ ⇒
+                def to(t: NlpToken): NCCustomWord =
+                    new NCCustomWord {
+                        override def getNormalizedText: String = t.normText
+                        override def getOriginalText: String = t.origText
+                        override def getStartCharIndex: Int = t.startCharIndex
+                        override def getEndCharIndex: Int = t.endCharIndex
+                        override def getPos: String = t.pos
+                        override def getPosDescription: String = t.posDesc
+                        override def getLemma: String = t.lemma
+                        override def getStem: String = t.stem
+                        override def isStopWord: Boolean = t.isStopWord
+                        override def isBracketed: Boolean = t.isBracketed
+                        override def isQuoted: Boolean = t.isQuoted
+                        override def isKnownWord: Boolean = t.isKnownWord
+                        override def isSwearWord: Boolean = t.isSwearWord
+                        override def isEnglish: Boolean = t.isEnglish
+                    }
+
+                val res = parser.parse(
+                    req,
+                    mdl.model,
+                    ns.map(to).asJava,
+                    ns.flatten.distinct.filter(!_.isNlp).map(n ⇒ {
+                        val noteId = n.noteType
+                        val words = ns.filter(t ⇒ n.tokenIndexes.contains(t.index)).map(to).asJava
+                        val md = n.asMetadata()
+
+                        new NCCustomElement() {
+                            override def getElementId: String = noteId
+                            override def getWords: util.List[NCCustomWord] = words
+                            override def getMetadata: JavaMeta = md.map(p ⇒ p._1 → p._2.asInstanceOf[AnyRef]).asJava
+                        }
+                    }).asJava
+                )
+
+                if (res != null)
+                    res.asScala.foreach(e ⇒ {
+                        val elemId = e.getElementId
+                        val words = e.getWords
+
+                        if (elemId == null)
+                            throw new NCE(s"Custom model parser cannot return 'null' element ID.")
+
+                        if (words == null || words.isEmpty)
+                            throw new NCE(s"Custom model parser cannot return empty custom tokens [elementId=$elemId]")
+
+                        val matchedToks = words.asScala.map(w ⇒
+                            ns.find(t ⇒
+                                t.startCharIndex == w.getStartCharIndex && t.endCharIndex == w.getEndCharIndex
+                            ).getOrElse(throw new AssertionError(s"Custom model parser returned an invalid custom token: $w"))
+                        )
+
+                        if (!alreadyMarked(matchedToks, elemId))
+                            mark(
+                                ns,
+                                elem = mdl.elements.getOrElse(elemId, throw new NCE(s"Custom model parser returned unknown element ID: $elemId")),
+                                toks = matchedToks,
+                                direct = true,
+                                syn = None,
+                                metaOpt = Some(e.getMetadata.asScala),
+                                parts = Seq.empty
+                            )
+                    })
+            }
+
+            parser.onDiscard()
+        }
+    }
+
+    /**
       * Gets all sequential permutations of given tokens.
       *
       * For example, if buffer contains "a b c d" tokens, then this function will return the
@@ -306,7 +403,7 @@ object NCModelEnricher extends NCProbeEnricher with DecorateAsScala {
       * @param tows
       * @param ns
       */
-    private def toNlpTokens(tows: Seq[NCDslContent], ns: NCNlpSentence): Seq[NlpToken] =
+    private def toTokens(tows: Seq[NCDslContent], ns: NCNlpSentence): Seq[NlpToken] =
         (
             tows.filter(_.isRight).map(_.right.get) ++
                 tows.filter(_.isLeft).map(_.left.get).
@@ -320,6 +417,14 @@ object NCModelEnricher extends NCProbeEnricher with DecorateAsScala {
     private def tokString(toks: Seq[NlpToken]): String = toks.map(t ⇒ (t.origText, t.index)).mkString(" ")
 
     /**
+      *
+      * @param m
+      * @param id
+      * @return
+      */
+    private def get(m: Map[String , Seq[Synonym]], id: String): Seq[Synonym] = m.getOrElse(id, Seq.empty)
+
+    /**
       * Gets synonyms sorted in descending order by their weight (already prepared),
       * i.e. first synonym in the sequence is the most important one.
       *
@@ -330,308 +435,284 @@ object NCModelEnricher extends NCProbeEnricher with DecorateAsScala {
     private def fastAccess[T](fastMap: Map[String, Map[Int, T]], elmId: String, len: Int): Option[T] =
         fastMap.getOrElse(elmId, Map.empty[Int, T]).get(len)
 
+    /**
+      *
+      * @param mdl
+      * @param ns
+      */
+    private def mkComplexes(mdl: NCProbeModel, ns: NCNlpSentence): ComplexHolder = {
+        val complexesWords = ns.map(Complex(_))
+
+        val complexes: Seq[ComplexSeq] =
+            NCProbeVariants.convert(ns.srvReqId, mdl, NCSentenceManager.collapse(mdl.model, ns.clone())).
+                map(_.asScala).
+                par.
+                flatMap(sen ⇒
+                    // Tokens splitting.
+                    // For example sentence "A B С D E" (5 words) processed as 3 tokens on first phase after collapsing
+                    //  'A B' (2 words), 'C D' (2 words) and 'E' (1 word)
+                    //  So, result combinations will be:
+                    //  Token(AB) + Token(CD) + Token(E)
+                    //  Token(AB) + Word(C) + Word(D) + Token(E)
+                    //  Word(A) + Word(B) + Token(CD) + Token(E)
+                    //  Word(A) + Word(B) + Word(C) + Word(D) + Token(E)
+                    combos(sen).
+                        map(senPartComb ⇒ {
+                            sen.flatMap(t ⇒
+                                // Single word token is not split as words - token.
+                                // Partly (not strict in) token - word.
+                                if (t.wordIndexes.length == 1 || senPartComb.contains(t))
+                                    Seq(Complex(t))
+                                else
+                                    t.wordIndexes.map(complexesWords)
+                            )
+                            // Drops without tokens (IDL part works with tokens).
+                        }).filter(_.exists(_.isToken)).map(ComplexSeq(_)).distinct
+                ).seq
+
+        ComplexHolder(complexesWords, complexes)
+    }
+
+    /**
+      *
+      * @param h
+      * @param toks
+      */
+    private def mkComplexCombinations(h: ComplexHolder, toks: Seq[NlpToken]): Seq[Seq[Complex]] = {
+        val idxsSeq = toks.flatMap(_.wordIndexes)
+//        val idxsSorted = idxsSeq.sorted
+        val idxs = idxsSeq.toSet
+//        val idxMin = idxsSorted.head
+//        val idxMax = idxsSorted.last
+
+        h.complexes.par.
+            flatMap(complexSeq ⇒ {
+                //val rec = complexSeq.tokensComplexes.filter(_.isSubsetOf(idxMin, idxMax, idxs))
+                val rec = complexSeq.tokensComplexes.filter(_.wordIndexes.exists(idxsSeq.contains))
+
+                // Drops without tokens (IDL part works with tokens).
+                if (rec.nonEmpty)
+                    Some(
+                        rec ++
+                        (complexSeq.wordsIndexes.intersect(idxs) -- rec.flatMap(_.wordIndexes)).map(h.complexesWords)
+                    )
+                else
+                    None
+            }).seq
+    }
+
+    /**
+      *
+      * @param ns
+      * @param mdlId
+      * @param matches
+      */
+    private def processMatches(ns: NCNlpSentence, mdlId: String, matches: Seq[ElementMatch]): Unit = {
+        // Scans by elements that are found with same tokens length.
+        // Inside, for each token we drop all non-optimized combinations.
+        // Example:
+        // 1. element's synonym - 'a b', isSparse 'true', isPermuteSynonyms 'true'
+        // 2. Request 'a b a b',
+        // Initially found 0-1, 1-2, 2-3, 0-3.
+        // 0-3 will be deleted because for 0 and 3 tokens best variants found for same element with same tokens length.
+        val matchesNorm =
+            matches.
+                flatMap(m ⇒ m.tokens.map(_ → m)).
+                groupBy { case (t, m) ⇒ (m.element.getId, m.length, t) }.
+                flatMap { case (_, seq) ⇒
+                    // Optimization by sparsity sum for each tokens set for one element found with same tokens count.
+                    U.permute(
+                        seq.groupBy { case (tok, _) ⇒ tok }.
+                            map { case (_, seq) ⇒ seq.map { case (_, m) ⇒ m }.toList }.toList
+                    ).minBy(_.map(_.sparsity).sum)
+                }.
+                toSeq.
+                distinct
+
+        val matchCnt = matchesNorm.size
+
+        // TODO:matchesNorm
+        // Add notes for all remaining (non-intersecting) matches.
+        for ((m, idx) ← matches.zipWithIndex) {
+            if (DEEP_DEBUG)
+                logger.trace(
+                    s"Model '$mdlId' element found (${idx + 1} of $matchCnt) [" +
+                        s"elementId=${m.element.getId}, " +
+                        s"synonym=${m.synonym}, " +
+                        s"tokens=${tokString(m.tokens)}" +
+                        s"]"
+                )
+
+            val elm = m.element
+            val syn = m.synonym
+
+            val tokIdxs = m.tokens.map(_.index)
+            val direct = syn.isDirect && (tokIdxs == tokIdxs.sorted)
+
+            // TODO:
+            if (!alreadyMarked(m.tokens, elm.getId)) {
+                mark(ns, elem = elm, toks = m.tokens, direct = direct, syn = Some(syn), metaOpt = None, parts = m.parts)
+
+                println(s"SET: ${elm.getId}, m.tokens=${m.tokens.map(_.origText).mkString("|")}")
+            }
+            else
+                println(s"NOT SET: ${elm.getId}, m.tokens=${m.tokens.map(_.origText).mkString("|")}")
+        }
+    }
+
     @throws[NCE]
     override def enrich(mdl: NCProbeModel, ns: NCNlpSentence, senMeta: Map[String, Serializable], parent: Span = null): Unit = {
         require(isStarted)
 
-        startScopedSpan("enrich", parent,
-            "srvReqId" → ns.srvReqId,
-            "mdlId" → mdl.model.getId,
-            "txt" → ns.text
-        ) { span ⇒
-            val req = NCRequestImpl(senMeta, ns.srvReqId)
-            val tokIdxs = ns.map(t ⇒ t → t.wordIndexes).toMap
-            val firstPhase = !ns.exists(_.isUser)
+        val mdlId = mdl.model.getId
+        val srvReqId = ns.srvReqId
+
+        startScopedSpan("enrich", parent, "srvReqId" → srvReqId, "mdlId" → mdlId, "txt" → ns.text) { span ⇒
+            val req = NCRequestImpl(senMeta, srvReqId)
             val matches = mutable.ArrayBuffer.empty[ElementMatch]
             val cacheSparse = mkCache()
-            val cacheNotSparse = mkCache()
+            val cacheDirect = mkCache()
+            val h = mkComplexes(mdl, ns)
+
+            var found = false
+
+            def add(typ: String, elm: NCElement, cache: Cache, res: Seq[NlpToken], tokIdxs: Seq[Int], s: Synonym, parts: Seq[TokType]): Unit = {
+                val toksSet = res.toSet
 
-            def addMatch(elm: NCElement, toks: Seq[NlpToken], syn: Synonym, parts: Seq[TokType]): Unit = {
-                val toksSet = toks.toSet
+                var added = false
 
                 // TODO:
-                //if (!matches.exists(m ⇒ m.element.getId == elm.getId && toksSet.subsetOf(m.tokensSet)))
-                    matches += ElementMatch(elm, toks, syn, parts)
-            }
+                if (!matches.exists(m ⇒ m.element.getId == elm.getId && toksSet.subsetOf(m.tokensSet))) {
+                    matches += ElementMatch(elm, res, s, parts)
 
-            lazy val complexesWords = ns.map(Complex(_))
-            lazy val complexes: Seq[ComplexSeq] =
-                NCProbeVariants.convert(ns.srvReqId, mdl, NCSentenceManager.collapse(mdl.model, ns.clone())).
-                    map(_.asScala).
-                    par.
-                    flatMap(sen ⇒
-                        // Tokens splitting.
-                        // For example sentence "A B С D E" (5 words) processed as 3 tokens on first phase after collapsing
-                        //  'A B' (2 words), 'C D' (2 words) and 'E' (1 word)
-                        //  So, result combinations will be:
-                        //  Token(AB) + Token(CD) + Token(E)
-                        //  Token(AB) + Word(C) + Word(D) + Token(E)
-                        //  Word(A) + Word(B) + Token(CD) + Token(E)
-                        //  Word(A) + Word(B) + Word(C) + Word(D) + Token(E)
-                        combos(sen).
-                            map(senPartComb ⇒ {
-                                sen.flatMap(t ⇒
-                                    // Single word token is not split as words - token.
-                                    // Partly (not strict in) token - word.
-                                    if (t.wordIndexes.length == 1 || senPartComb.contains(t))
-                                        Seq(Complex(t))
-                                    else
-                                        t.wordIndexes.map(complexesWords)
-                                )
-                                // Drops without tokens (IDL part works with tokens).
-                            }).filter(_.exists(_.isToken)).map(ComplexSeq(_)).distinct
-                    ).seq
-
-            startScopedSpan("synsProc", span,
-                "srvReqId" → ns.srvReqId,
-                "mdlId" → mdl.model.getId,
-                "txt" → ns.text
-            ) {
-                _ ⇒
-                for (toks ← combos(ns)) {
-                    val indexes = toks.map(_.index)
-
-                    lazy val dslCombs: Map[Int, Seq[Seq[Complex]]] = {
-                        val idxsSeq = toks.flatMap(tokIdxs)
-                        val idxsSorted = idxsSeq.sorted
-                        val idxs = idxsSeq.toSet
-                        val idxMin = idxsSorted.head
-                        val idxMax = idxsSorted.last
-
-                        lazy val sorted = idxsSorted.zipWithIndex.toMap
-
-                        complexes.par.
-                            flatMap(complexSeq ⇒ {
-                                val rec = complexSeq.tokensComplexes.filter(_.isSubsetOf(idxMin, idxMax, idxs))
-
-                                // Drops without tokens (IDL part works with tokens).
-                                if (rec.nonEmpty)
-                                    Some(
-                                        rec ++
-                                            (complexSeq.wordsIndexes.intersect(idxs) -- rec.flatMap(_.wordIndexes)).
-                                            map(complexesWords)
-                                    )
-                                else
-                                    None
-                            }).
-                            map(_.sortBy(p ⇒ sorted(p.wordIndexes.head))).seq.groupBy(_.length)
-                    }
+                    added = true
+                }
 
-                    lazy val tokStems = toks.map(_.stem).mkString(" ")
-
-                    // Attempt to match each element.
-                    for (
-                        elm ← mdl.elements.values;
-                        elemId = elm.getId
-                        if !alreadyMarked(toks, elm.getId);
-                        sparseEnabled = !cacheSparse(elemId).exists(_.containsSlice(indexes));
-                        notSparseEnabled = !cacheNotSparse(elemId).exists(_.containsSlice(indexes))
-                        if sparseEnabled || notSparseEnabled
-                    ) {
-                        var found = false
-
-                        def add(cache: Cache, res: Seq[NlpToken], s: Synonym, parts: Seq[TokType]): Unit = {
-                            addMatch(elm, res, s, parts)
-                            cache(elemId) += indexes
-                            found = true
-                        }
+                cache(elm.getId) += tokIdxs
+                found = true
 
-                        def addSparse(res: Seq[NlpToken], s: Synonym, parts: Seq[TokType]): Unit = add(cacheSparse, res, s, parts)
-                        def addNotSparse(s: Synonym, parts: Seq[TokType]): Unit = add(cacheNotSparse, toks,  s, parts)
-
-                        // 1. Simple, not sparse.
-                        if (firstPhase && notSparseEnabled && !found)
-                            fastAccess(mdl.nonSparseSynonyms, elemId, toks.length) match {
-                                case Some(h) ⇒
-                                    def tryMap(synsMap: Map[String, Synonym], notFound: () ⇒ Unit): Unit =
-                                        synsMap.get(tokStems) match {
-                                            case Some(syn) ⇒ addNotSparse(syn, Seq.empty)
-                                            case None ⇒ notFound()
-                                        }
-
-                                    def tryScan(synsSeq: Seq[Synonym]): Unit =
-                                        for (syn ← synsSeq if !found)
-                                            if (syn.isMatch(toks))
-                                                addNotSparse(syn, Seq.empty)
-
-                                    tryMap(
-                                        h.txtDirectSynonyms,
-                                        () ⇒ {
-                                            tryScan(h.notTxtDirectSynonyms)
-
-                                            if (!found)
-                                                tryMap(
-                                                    h.txtNotDirectSynonyms,
-                                                    () ⇒ tryScan(h.notTxtNotDirectSynonyms)
-                                                )
-                                        }
-                                    )
-                                case None ⇒ // No-op.
-                            }
-
-                        // 2. DSL, non sparse.
-                        if (notSparseEnabled && mdl.nonSparseSynonymsDsl.nonEmpty && !found) {
-                            for (
-                                (len, seq) ← dslCombs;
-                                syn ← fastAccess(mdl.nonSparseSynonymsDsl, elemId, len).getOrElse(Seq.empty);
-                                comb ← seq if !found
-                            ) {
-                                if (syn.isMatch(comb.map(_.data), req))
-                                    addNotSparse(syn, getPartsComplex(comb, syn))
-                            }
-                        }
+                println(s"ADDED: ${elm.getId}, type=$typ, res=${res.map(_.origText).mkString("|")}, tokIdxs=${tokIdxs.mkString("|")}, added=$added")
+            }
 
-                        // 3. Simple, sparse.
-                        if (firstPhase && sparseEnabled && !found)
-                            for (syn ← mdl.sparseSynonyms.getOrElse(elemId, Seq.empty) if !found)
-                                syn.trySparseMatch(toks) match {
-                                    case Some(res) ⇒ addSparse(res, syn, Seq.empty)
-                                    case None ⇒ // No-op.
+            startScopedSpan("synsProc", span, "srvReqId" → srvReqId, "mdlId" → mdlId, "txt" → ns.text) { _ ⇒
+                var state = if (ns.firstProbePhase) SIMPLE else DSL_NEXT
+                ns.firstProbePhase = false
+                val combosToks = combos(ns)
+
+                def go(): Unit = {
+                    println
+                    println(s"GO $state")
+
+                    for (toks ← combosToks) {
+                        val tokIdxs = toks.map(_.index)
+                        lazy val dslCombs: Seq[Seq[Complex]] = mkComplexCombinations(h, toks)
+                        lazy val tokStems = toks.map(_.stem).mkString(" ")
+
+                        // Attempt to match each element.
+                        // TODO: alreadyMarked - может быть найдено тоже самое но отмечено меньше (как это сразу не рассматривать?)
+                        for (
+                            elm ← mdl.elements.values;
+                            elemId = elm.getId;
+                            if
+                                !alreadyMarked(toks, elm.getId)
+                        ) {
+                            val directProc = cacheDirect(elemId).exists(_.containsSlice(tokIdxs))
+                            val sparseProc = cacheSparse(elemId).exists(_.containsSlice(tokIdxs))
+
+                            // 1. SIMPLE.
+                            found = false
+
+                            val simpleEnabled: Boolean =
+                                state match {
+                                    case SIMPLE ⇒ !mdl.hasDslSynonyms(elemId)
+                                    case DSL_FIRST ⇒ mdl.hasDslSynonyms(elemId)
+                                    case _ ⇒ false
                                 }
 
-                        // 4. DSL, sparse.
-                        if (sparseEnabled && mdl.sparseSynonymsDsl.nonEmpty && !found)
-                            for (
-                                syn ← mdl.sparseSynonymsDsl.getOrElse(elemId, Seq.empty);
-                                (_, seq) ← dslCombs;
-                                comb ← seq if !found
-                            ) {
-                                syn.trySparseMatch(comb.map(_.data), req) match {
-                                    case Some(towsRes) ⇒ addSparse(toNlpTokens(towsRes, ns), syn, toParts(towsRes, syn))
+                            // 1.1 Direct.
+                            if (simpleEnabled && !directProc && !found)
+                                fastAccess(mdl.directSynonyms, elemId, toks.length) match {
+                                    case Some(h) ⇒
+                                        def tryMap(syns: Map[String, Synonym], notFound: () ⇒ Unit): Unit =
+                                            syns.get(tokStems) match {
+                                                case Some(s) ⇒ add("direct simple", elm, cacheDirect, toks, tokIdxs, s, Seq.empty)
+                                                case None ⇒ notFound()
+                                            }
+
+                                        def tryScan(syns: Seq[Synonym]): Unit =
+                                            for (s ← syns if !found)
+                                                if (s.isMatch(toks))
+                                                    add("direct simple2", elm, cacheDirect, toks, tokIdxs, s, Seq.empty)
+
+                                        tryMap(
+                                            h.txtDirectSynonyms,
+                                            () ⇒ {
+                                                tryScan(h.notTxtDirectSynonyms)
+
+                                                if (!found)
+                                                    tryMap(h.txtNotDirectSynonyms, () ⇒ tryScan(h.notTxtNotDirectSynonyms))
+                                            }
+                                        )
                                     case None ⇒ // No-op.
                                 }
-                            }
-                    }
-                }
-            }
 
-            // Scans by elements that are found with same tokens length.
-            // Inside, for each token we drop all non-optimized combinations.
-            // Example:
-            // 1. element's synonym - 'a b', isSparse 'true', isPermuteSynonyms 'true'
-            // 2. Request 'a b a b',
-            // Initially found 0-1, 1-2, 2-3, 0-3.
-            // 0-3 will be deleted because for 0 and 3 tokens best variants found for same element with same tokens length.
-            val matchesNorm =
-                matches.
-                    flatMap(m ⇒ m.tokens.map(_ → m)).
-                    groupBy { case (t, m) ⇒ (m.element.getId, m.length, t) }.
-                    flatMap { case (_, seq) ⇒
-                        // Optimization by sparsity sum for each tokens set for one element found with same tokens count.
-                        U.permute(
-                            seq.groupBy { case (tok, _) ⇒ tok }.
-                                map { case (_, seq) ⇒ seq.map { case (_, m) ⇒ m }.toList }.toList
-                        ).minBy(_.map(_.sparsity).sum)
-                    }.
-                    toSeq.
-                    distinct
-
-            val matchCnt = matchesNorm.size
-
-            // TODO:matchesNorm
-            // Add notes for all remaining (non-intersecting) matches.
-            for ((m, idx) ← matches.zipWithIndex) {
-                if (DEEP_DEBUG)
-                    logger.trace(
-                        s"Model '${mdl.model.getId}' element found (${idx + 1} of $matchCnt) [" +
-                            s"elementId=${m.element.getId}, " +
-                            s"synonym=${m.synonym}, " +
-                            s"tokens=${tokString(m.tokens)}" +
-                            s"]"
-                    )
-
-                val elm = m.element
-                val syn = m.synonym
+                            // 1.2 Sparse.
+                            if (simpleEnabled && !sparseProc && !found)
+                                for (s ← get(mdl.sparseSynonyms, elemId) if !found)
+                                    s.trySparseMatch(toks) match {
+                                        case Some(res) ⇒ add("sparse simple", elm, cacheSparse, res, tokIdxs, s, Seq.empty)
+                                        case None ⇒ // No-op.
+                                    }
+
+                            // 2. DSL.
+                            found = false
+                            val dslEnabled = state != SIMPLE
+
+                            // 2.1 Direct.
+                            if (dslEnabled && mdl.directSynonymsDsl.nonEmpty && !directProc && !found)
+                                for (s ← get(mdl.directSynonymsDsl, elemId); comb ← dslCombs if !found) {
+                                    if (s.isMatch(comb.map(_.data), req)) {
+                                        println(s"OK $elemId for s=$s for toks:${toks.map(_.origText)}")
+
+                                        add("direct DSL", elm, cacheDirect, toks, tokIdxs, s, getPartsComplex(comb, s))
+                                    }
+                                    println {
+                                        println(s"NOT OK $elemId for s=$s for toks:${toks.map(_.origText)}")
+                                    }
+                                }
 
-                val tokIdxs = m.tokens.map(_.index)
-                val direct = syn.isDirect && (tokIdxs == tokIdxs.sorted)
+                            // 2.2 Sparse.
+                            if (dslEnabled && mdl.sparseSynonymsDsl.nonEmpty && !sparseProc && !found)
+                                for (s ← get(mdl.sparseSynonymsDsl, elemId); comb ← dslCombs if !found)
+                                    s.trySparseMatch(comb.map(_.data), req) match {
+                                        case Some(res) ⇒ add("sparse DSL", elm, cacheSparse, toTokens(res, ns), tokIdxs, s, toParts(res, s))
+                                        case None ⇒ // No-op.
+                                    }
+                        }
+                    }
 
-                // TODO:
-                if (!alreadyMarked(m.tokens, elm.getId))
-                    mark(ns, elem = elm, toks = m.tokens, direct = direct, syn = Some(syn), metaOpt = None, parts = m.parts)
-            }
+                    processMatches(ns, mdlId, matches)
+                }
 
-            val parsers = mdl.model.getParsers
-
-            for (parser ← parsers.asScala) {
-                parser.onInit()
-
-                startScopedSpan("customParser", span,
-                    "srvReqId" → ns.srvReqId,
-                    "mdlId" → mdl.model.getId,
-                    "txt" → ns.text) { _ ⇒
-                    def to(t: NlpToken): NCCustomWord =
-                        new NCCustomWord {
-                            override def getNormalizedText: String = t.normText
-                            override def getOriginalText: String = t.origText
-                            override def getStartCharIndex: Int = t.startCharIndex
-                            override def getEndCharIndex: Int = t.endCharIndex
-                            override def getPos: String = t.pos
-                            override def getPosDescription: String = t.posDesc
-                            override def getLemma: String = t.lemma
-                            override def getStem: String = t.stem
-                            override def isStopWord: Boolean = t.isStopWord
-                            override def isBracketed: Boolean = t.isBracketed
-                            override def isQuoted: Boolean = t.isQuoted
-                            override def isKnownWord: Boolean = t.isKnownWord
-                            override def isSwearWord: Boolean = t.isSwearWord
-                            override def isEnglish: Boolean = t.isEnglish
-                        }
+                go()
 
-                    val res = parser.parse(
-                        req,
-                        mdl.model,
-                        ns.map(to).asJava,
-                        ns.flatten.distinct.filter(!_.isNlp).map(n ⇒ {
-                            val noteId = n.noteType
-                            val words = ns.filter(t ⇒ n.tokenIndexes.contains(t.index)).map(to).asJava
-                            val md = n.asMetadata()
-
-                            new NCCustomElement() {
-                                override def getElementId: String = noteId
-                                override def getWords: util.List[NCCustomWord] = words
-                                override def getMetadata: JavaMeta = md.map(p ⇒ p._1 → p._2.asInstanceOf[AnyRef]).asJava
-                            }
-                        }).asJava
-                    )
+                if (state == SIMPLE) {
+                    state = DSL_FIRST
 
-                    if (res != null)
-                        res.asScala.foreach(e ⇒ {
-                            val elemId = e.getElementId
-                            val words = e.getWords
+                    go()
+                }
 
-                            if (elemId == null)
-                                throw new NCE(s"Custom model parser cannot return 'null' element ID.")
 
-                            if (words == null || words.isEmpty)
-                                throw new NCE(s"Custom model parser cannot return empty custom tokens [elementId=$elemId]")
+            }
 
-                            val matchedToks = words.asScala.map(w ⇒
-                                ns.find(t ⇒
-                                    t.startCharIndex == w.getStartCharIndex && t.endCharIndex == w.getEndCharIndex
-                                ).getOrElse(throw new AssertionError(s"Custom model parser returned an invalid custom token: $w"))
-                            )
 
-                            if (!alreadyMarked(matchedToks, elemId))
-                                mark(
-                                    ns,
-                                    elem = mdl.elements.getOrElse(elemId, throw new NCE(s"Custom model parser returned unknown element ID: $elemId")),
-                                    toks = matchedToks,
-                                    direct = true,
-                                    syn = None,
-                                    metaOpt = Some(e.getMetadata.asScala),
-                                    parts = Seq.empty
-                                )
-                        })
-                }
 
-                parser.onDiscard()
-            }
+            processParsers(mdl, ns, span, req)
         }
     }
 
     def isComplex(mdl: NCProbeModel): Boolean =
-        mdl.nonSparseSynonymsDsl.nonEmpty ||
+        mdl.directSynonymsDsl.nonEmpty ||
         mdl.sparseSynonymsDsl.nonEmpty ||
         !mdl.model.getParsers.isEmpty
 }
\ No newline at end of file
diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCEnricherNestedModelSpec.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCEnricherNestedModelSpec.scala
index fa9b3c7..00ee50f 100644
--- a/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCEnricherNestedModelSpec.scala
+++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCEnricherNestedModelSpec.scala
@@ -79,34 +79,34 @@ class NCEnricherNestedModelSpec2 extends NCEnricherNestedModelSpec1 {
     @Test
     def test2(): Unit =
         runBatch(
-            _ ⇒ checkExists(
-                "test tomorrow",
-                usr(text = "test tomorrow", id = "x1")
-            ),
-            _ ⇒ checkExists(
-                "tomorrow test",
-                usr(text = "tomorrow test", id = "x1")
-            ),
+//            _ ⇒ checkExists(
+//                "test tomorrow",
+//                usr(text = "test tomorrow", id = "x3")
+//            ),
+//            _ ⇒ checkExists(
+//                "tomorrow test",
+//                usr(text = "tomorrow test", id = "x3")
+//            ),
             _ ⇒ checkExists(
                 "test xxx tomorrow",
-                usr(text = "test tomorrow", id = "x1"),
+                usr(text = "test tomorrow", id = "x3"),
                 nlp(text = "xxx"),
             ),
-            _ ⇒ checkExists(
-                "y the y",
-                usr(text = "y y", id = "y3"),
-                nlp(text = "the", isStop = true)
-            ),
-            _ ⇒ checkExists(
-                "y xxx y",
-                usr(text = "y y", id = "y3"),
-                nlp(text = "xxx")
-            ),
-            _ ⇒ checkExists(
-                "aaa y xxx y",
-                nlp(text = "aaa"),
-                usr(text = "y y", id = "y3"),
-                nlp(text = "xxx")
-            )
+//            _ ⇒ checkExists(
+//                "y the y",
+//                usr(text = "y y", id = "y3"),
+//                nlp(text = "the", isStop = true)
+//            ),
+//            _ ⇒ checkExists(
+//                "y xxx y",
+//                usr(text = "y y", id = "y3"),
+//                nlp(text = "xxx")
+//            ),
+//            _ ⇒ checkExists(
+//                "aaa y xxx y",
+//                nlp(text = "aaa"),
+//                usr(text = "y y", id = "y3"),
+//                nlp(text = "xxx")
+//            )
         )
 }
\ No newline at end of file
diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCEnricherNestedModelSpec2.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCEnricherNestedModelSpec2.scala
index ede9153..7b81473 100644
--- a/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCEnricherNestedModelSpec2.scala
+++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCEnricherNestedModelSpec2.scala
@@ -27,8 +27,11 @@ import java.util
   * Nested Elements test model.
   */
 class NCNestedTestModel21 extends NCModelAdapter("nlpcraft.nested2.test.mdl", "Nested Test Model", "1.0") {
-    override def getElements: util.Set[NCElement] =
+    override def getElements: util.Set[NCElement] = {
+        // Note - it defines one simple and one DSL synonyms.
+        // But it should be caught by long (DSL) variant (for `10 word`)
         Set(NCTestElement("e1", "{^^{tok_id() == 'nlpcraft:num'}^^|_} word"))
+    }
 
     @NCIntent("intent=onE1 term(t1)={tok_id() == 'e1'}")
     def onAB(ctx: NCIntentMatch): NCResult = NCResult.text("OK")

[incubator-nlpcraft] 06/07: WIP.

Posted by se...@apache.org.

This is an automated email from the ASF dual-hosted git repository.

sergeykamov pushed a commit to branch NLPCRAFT-287
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git

commit c8e4898cabb96980ed9e9544b584a67a1ec78770
Author: Sergey Kamov <sk...@gmail.com>
AuthorDate: Thu Apr 8 19:00:18 2021 +0300

    WIP.
---
 .../model/NCEnricherNestedModelSpec.scala          | 48 +++++++++++-----------
 1 file changed, 24 insertions(+), 24 deletions(-)

diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCEnricherNestedModelSpec.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCEnricherNestedModelSpec.scala
index 00ee50f..91d037b 100644
--- a/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCEnricherNestedModelSpec.scala
+++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCEnricherNestedModelSpec.scala
@@ -79,34 +79,34 @@ class NCEnricherNestedModelSpec2 extends NCEnricherNestedModelSpec1 {
     @Test
     def test2(): Unit =
         runBatch(
-//            _ ⇒ checkExists(
-//                "test tomorrow",
-//                usr(text = "test tomorrow", id = "x3")
-//            ),
-//            _ ⇒ checkExists(
-//                "tomorrow test",
-//                usr(text = "tomorrow test", id = "x3")
-//            ),
+            _ ⇒ checkExists(
+                "test tomorrow",
+                usr(text = "test tomorrow", id = "x3")
+            ),
+            _ ⇒ checkExists(
+                "tomorrow test",
+                usr(text = "tomorrow test", id = "x3")
+            ),
             _ ⇒ checkExists(
                 "test xxx tomorrow",
                 usr(text = "test tomorrow", id = "x3"),
                 nlp(text = "xxx"),
             ),
-//            _ ⇒ checkExists(
-//                "y the y",
-//                usr(text = "y y", id = "y3"),
-//                nlp(text = "the", isStop = true)
-//            ),
-//            _ ⇒ checkExists(
-//                "y xxx y",
-//                usr(text = "y y", id = "y3"),
-//                nlp(text = "xxx")
-//            ),
-//            _ ⇒ checkExists(
-//                "aaa y xxx y",
-//                nlp(text = "aaa"),
-//                usr(text = "y y", id = "y3"),
-//                nlp(text = "xxx")
-//            )
+            _ ⇒ checkExists(
+                "y the y",
+                usr(text = "y y", id = "y3"),
+                nlp(text = "the", isStop = true)
+            ),
+            _ ⇒ checkExists(
+                "y xxx y",
+                usr(text = "y y", id = "y3"),
+                nlp(text = "xxx")
+            ),
+            _ ⇒ checkExists(
+                "aaa y xxx y",
+                nlp(text = "aaa"),
+                usr(text = "y y", id = "y3"),
+                nlp(text = "xxx")
+            )
         )
 }
\ No newline at end of file

[incubator-nlpcraft] 07/07: WIP.

Posted by se...@apache.org.

This is an automated email from the ASF dual-hosted git repository.

sergeykamov pushed a commit to branch NLPCRAFT-287
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git

commit bbeecf6ad83eb7941676e32298859989e0f89bc0
Author: Sergey Kamov <sk...@gmail.com>
AuthorDate: Thu Apr 8 22:48:24 2021 +0300

    WIP.
---
 .../mgrs/nlp/enrichers/model/NCModelEnricher.scala | 74 ++++++++++------------
 1 file changed, 35 insertions(+), 39 deletions(-)

diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
index 5169afe..0ec40cd 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
@@ -26,15 +26,17 @@ import org.apache.nlpcraft.probe.mgrs.NCProbeSynonymChunkKind.{NCSynonymChunkKin
 import org.apache.nlpcraft.probe.mgrs.nlp.NCProbeEnricher
 import org.apache.nlpcraft.probe.mgrs.nlp.impl.NCRequestImpl
 import org.apache.nlpcraft.probe.mgrs.sentence.NCSentenceManager
-import org.apache.nlpcraft.probe.mgrs.{NCProbeModel, NCProbeVariants, NCProbeSynonym ⇒ Synonym}
+import org.apache.nlpcraft.probe.mgrs.{NCProbeModel, NCProbeVariants, NCProbeSynonym ⇒Synonym}
 
 import java.io.Serializable
 import java.util
+import java.util.{List ⇒ JList}
 import scala.collection.JavaConverters._
 import scala.collection.convert.DecorateAsScala
 import scala.collection.mutable.ArrayBuffer
 import scala.collection.{Map, Seq, mutable}
 
+
 /**
   * Model elements enricher.
   */
@@ -133,7 +135,8 @@ object NCModelEnricher extends NCProbeEnricher with DecorateAsScala {
         element: NCElement,
         tokens: Seq[NlpToken],
         synonym: Synonym,
-        parts: Seq[TokType]
+        parts: Seq[TokType],
+        tokIdxs: Seq[Int]
     ) extends Ordered[ElementMatch] {
         // Tokens sparsity.
         lazy val sparsity: Int = U.calcSparsity(tokens.map(_.index))
@@ -203,6 +206,7 @@ object NCModelEnricher extends NCProbeEnricher with DecorateAsScala {
       * @param syn
       * @param metaOpt
       * @param parts
+      * @param toksIdxs
       */
     private def mark(
         ns: NCNlpSentence,
@@ -211,13 +215,18 @@ object NCModelEnricher extends NCProbeEnricher with DecorateAsScala {
         direct: Boolean,
         syn: Option[Synonym],
         metaOpt: Option[Map[String, Object]],
-        parts: Seq[TokType]
+        parts: Seq[TokType],
+        toksIdxs: Seq[Int]
     ): Unit = {
         val params = mutable.ArrayBuffer.empty[(String, AnyRef)]
 
         // For system elements.
         params += "direct" → direct.asInstanceOf[AnyRef]
 
+        val toksIdxsJava: JList[Int] = toksIdxs.asJava
+
+        params += "allToksIndexes" → toksIdxsJava
+
         syn match {
             case Some(s) ⇒
                 if (s.isValueSynonym)
@@ -302,7 +311,7 @@ object NCModelEnricher extends NCProbeEnricher with DecorateAsScala {
 
                         new NCCustomElement() {
                             override def getElementId: String = noteId
-                            override def getWords: util.List[NCCustomWord] = words
+                            override def getWords: JList[NCCustomWord] = words
                             override def getMetadata: JavaMeta = md.map(p ⇒ p._1 → p._2.asInstanceOf[AnyRef]).asJava
                         }
                     }).asJava
@@ -333,7 +342,8 @@ object NCModelEnricher extends NCProbeEnricher with DecorateAsScala {
                                 direct = true,
                                 syn = None,
                                 metaOpt = Some(e.getMetadata.asScala),
-                                parts = Seq.empty
+                                parts = Seq.empty,
+                                matchedToks.map(_.index)
                             )
                     })
             }
@@ -369,7 +379,17 @@ object NCModelEnricher extends NCProbeEnricher with DecorateAsScala {
       * @param toks
       * @param elemId
       */
-    private def alreadyMarked(toks: Seq[NlpToken], elemId: String): Boolean = toks.forall(_.isTypeOf(elemId))
+    private def alreadyMarked(toks: Seq[NlpToken], elemId: String): Boolean =
+        toks.forall(_.isTypeOf(elemId)) ||
+        toks.flatten.exists(n ⇒
+            n.noteType == elemId &&
+            (
+                n.dataOpt("allToksIndexes").asInstanceOf[Option[JList[Int]]] match {
+                    case Some(idxs) ⇒ idxs.asScala.containsSlice(toks.map(_.index))
+                    case None ⇒ false
+                }
+            )
+        )
 
     /**
       *
@@ -506,55 +526,29 @@ object NCModelEnricher extends NCProbeEnricher with DecorateAsScala {
       * @param matches
       */
     private def processMatches(ns: NCNlpSentence, mdlId: String, matches: Seq[ElementMatch]): Unit = {
-        // Scans by elements that are found with same tokens length.
-        // Inside, for each token we drop all non-optimized combinations.
-        // Example:
-        // 1. element's synonym - 'a b', isSparse 'true', isPermuteSynonyms 'true'
-        // 2. Request 'a b a b',
-        // Initially found 0-1, 1-2, 2-3, 0-3.
-        // 0-3 will be deleted because for 0 and 3 tokens best variants found for same element with same tokens length.
-        val matchesNorm =
-            matches.
-                flatMap(m ⇒ m.tokens.map(_ → m)).
-                groupBy { case (t, m) ⇒ (m.element.getId, m.length, t) }.
-                flatMap { case (_, seq) ⇒
-                    // Optimization by sparsity sum for each tokens set for one element found with same tokens count.
-                    U.permute(
-                        seq.groupBy { case (tok, _) ⇒ tok }.
-                            map { case (_, seq) ⇒ seq.map { case (_, m) ⇒ m }.toList }.toList
-                    ).minBy(_.map(_.sparsity).sum)
-                }.
-                toSeq.
-                distinct
-
-        val matchCnt = matchesNorm.size
-
         // TODO:matchesNorm
         // Add notes for all remaining (non-intersecting) matches.
         for ((m, idx) ← matches.zipWithIndex) {
             if (DEEP_DEBUG)
                 logger.trace(
-                    s"Model '$mdlId' element found (${idx + 1} of $matchCnt) [" +
+                    s"Model '$mdlId' element found (${idx + 1} of ${matches.size}) [" +
                         s"elementId=${m.element.getId}, " +
                         s"synonym=${m.synonym}, " +
                         s"tokens=${tokString(m.tokens)}" +
                         s"]"
                 )
 
-            val elm = m.element
-            val syn = m.synonym
-
             val tokIdxs = m.tokens.map(_.index)
-            val direct = syn.isDirect && (tokIdxs == tokIdxs.sorted)
+            val direct = m.synonym.isDirect && (tokIdxs == tokIdxs.sorted)
 
             // TODO:
-            if (!alreadyMarked(m.tokens, elm.getId)) {
-                mark(ns, elem = elm, toks = m.tokens, direct = direct, syn = Some(syn), metaOpt = None, parts = m.parts)
+            if (!alreadyMarked(m.tokens, m.element.getId)) {
+                mark(ns, m.element, m.tokens, direct, syn = Some(m.synonym), metaOpt = None, m.parts, m.tokIdxs)
 
-                println(s"SET: ${elm.getId}, m.tokens=${m.tokens.map(_.origText).mkString("|")}")
+                println(s"SET: ${m.element.getId}, m.tokens=${m.tokens.map(_.origText).mkString("|")}")
             }
             else
-                println(s"NOT SET: ${elm.getId}, m.tokens=${m.tokens.map(_.origText).mkString("|")}")
+                println(s"NOT SET: ${m.element.getId}, m.tokens=${m.tokens.map(_.origText).mkString("|")}")
         }
     }
 
@@ -571,7 +565,9 @@ object NCModelEnricher extends NCProbeEnricher with DecorateAsScala {
 
             startScopedSpan("synsProc", span, "srvReqId" → srvReqId, "mdlId" → mdlId, "txt" → ns.text) { _ ⇒
                 var state = if (ns.firstProbePhase) SIMPLE else DSL_NEXT
+
                 ns.firstProbePhase = false
+
                 val combosToks = combos(ns)
 
                 def go(): Unit = {
@@ -590,7 +586,7 @@ object NCModelEnricher extends NCProbeEnricher with DecorateAsScala {
                         var added = false
 
                         if (!matchExist(elm.getId, res)) {
-                            matches += ElementMatch(elm, res, s, parts)
+                            matches += ElementMatch(elm, res, s, parts, tokIdxs)
 
                             added = true
                         }