You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@nlpcraft.apache.org by se...@apache.org on 2021/09/23 15:31:32 UTC

[incubator-nlpcraft] branch NLPCRAFT-443-1 updated (4376e97 -> 12bfead)

This is an automated email from the ASF dual-hosted git repository.

sergeykamov pushed a change to branch NLPCRAFT-443-1
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git.


    from 4376e97  WIP.
     new 26d4c40  WIP.
     new 12bfead  WIP.

The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 .../nlpcraft/common/nlp/NCNlpSentenceNote.scala    |  64 ++--
 .../nlpcraft/probe/mgrs/NCProbeVariants.scala      |   2 +-
 .../probe/mgrs/synonyms/NCSynonymsManager.scala    |  80 +++--
 .../nlp/enrichers/NCServerEnrichmentManager.scala  |   2 +-
 .../nlp/enrichers/sort/NCEnricherSortSpec.scala    | 338 ++++++++++-----------
 5 files changed, 266 insertions(+), 220 deletions(-)

[incubator-nlpcraft] 01/02: WIP.

Posted by se...@apache.org.

This is an automated email from the ASF dual-hosted git repository.

sergeykamov pushed a commit to branch NLPCRAFT-443-1
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git

commit 26d4c40438ef31e3cbc6a94020569f735ba7ed24
Author: Sergey Kamov <sk...@gmail.com>
AuthorDate: Thu Sep 23 17:20:19 2021 +0300

    WIP.
---
 .../nlpcraft/common/nlp/NCNlpSentenceNote.scala    |  64 ++--
 .../probe/mgrs/synonyms/NCSynonymsManager.scala    |  74 ++++-
 .../nlp/enrichers/NCServerEnrichmentManager.scala  |   2 +-
 .../nlp/enrichers/sort/NCEnricherSortSpec.scala    | 338 ++++++++++-----------
 4 files changed, 264 insertions(+), 214 deletions(-)

diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentenceNote.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentenceNote.scala
index fb9a016..c8ef9ce 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentenceNote.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentenceNote.scala
@@ -133,35 +133,11 @@ class NCNlpSentenceNote(private val values: mutable.HashMap[String, JSerializabl
       * @return
       */
     def getKey(withIndexes: Boolean = true, withReferences: Boolean = true): Seq[Any] = {
-        def addRefs(names: String*): Seq[String] = if (withReferences) names else Seq.empty
-
-        val names: Seq[String] =
-            if (isUser)
-                Seq.empty
-            else
-                noteType match {
-                    case "nlpcraft:continent" => Seq("continent")
-                    case "nlpcraft:subcontinent" => Seq("continent", "subcontinent")
-                    case "nlpcraft:country" => Seq("continent", "subcontinent", "country")
-                    case "nlpcraft:region" => Seq("continent", "subcontinent", "country", "region")
-                    case "nlpcraft:city" => Seq("continent", "subcontinent", "country", "region", "city")
-                    case "nlpcraft:metro" => Seq("metro")
-                    case "nlpcraft:date" => Seq("from", "to")
-                    case "nlpcraft:relation" => Seq("type", "note") ++ addRefs("indexes")
-                    case "nlpcraft:sort" => Seq("asc", "subjnotes", "bynotes") ++ addRefs("subjindexes", "byindexes")
-                    case "nlpcraft:limit" => Seq("limit", "note") ++ addRefs("indexes", "asc") // Asc flag has sense only with references for limit.
-                    case "nlpcraft:coordinate" => Seq("latitude", "longitude")
-                    case "nlpcraft:num" => Seq("from", "to", "unit", "unitType")
-                    case x if x.startsWith("google:") => Seq("meta", "mentionsBeginOffsets", "mentionsContents", "mentionsTypes")
-                    case x if x.startsWith("stanford:") => Seq("nne")
-                    case x if x.startsWith("opennlp:") => Seq.empty
-                    case x if x.startsWith("spacy:") => Seq("vector")
-
-                    case _ => throw new AssertionError(s"Unexpected note type: $noteType")
-                }
-
         val seq1 = if (withIndexes) Seq(wordIndexes, noteType) else Seq(noteType)
-        val seq2 = names.map(name => this.getOrElse(name, null))
+        val seq2 = if (isUser)
+            Seq.empty
+        else
+            getBuiltProperties(noteType, withReferences).map(name => this.getOrElse(name, null))
 
         seq1 ++ seq2
     }
@@ -284,4 +260,36 @@ object NCNlpSentenceNote {
      */
     def apply(indexes: mutable.Seq[Int], wordIndexes: mutable.Seq[Int], typ: String, params: (String, Any)*): NCNlpSentenceNote =
         apply(indexes.toSeq, Some(wordIndexes.toSeq), typ, params.toMap)
+
+    /**
+      *
+      * @param noteType
+      * @param withReferences
+      */
+    def getBuiltProperties(noteType: String, withReferences: Boolean = true): Seq[String] = {
+        def addRefs(names: String*): Seq[String] = if (withReferences) names else Seq.empty
+
+        noteType match {
+            case "nlpcraft:nlp" => Seq.empty
+
+            case "nlpcraft:continent" => Seq("continent")
+            case "nlpcraft:subcontinent" => Seq("continent", "subcontinent")
+            case "nlpcraft:country" => Seq("continent", "subcontinent", "country")
+            case "nlpcraft:region" => Seq("continent", "subcontinent", "country", "region")
+            case "nlpcraft:city" => Seq("continent", "subcontinent", "country", "region", "city")
+            case "nlpcraft:metro" => Seq("metro")
+            case "nlpcraft:date" => Seq("from", "to")
+            case "nlpcraft:relation" => Seq("type", "note") ++ addRefs("indexes")
+            case "nlpcraft:sort" => Seq("asc", "subjnotes", "bynotes") ++ addRefs("subjindexes", "byindexes")
+            case "nlpcraft:limit" => Seq("limit", "note") ++ addRefs("indexes", "asc") // Asc flag has sense only with references for limit.
+            case "nlpcraft:coordinate" => Seq("latitude", "longitude")
+            case "nlpcraft:num" => Seq("from", "to", "unit", "unitType")
+            case x if x.startsWith("google:") => Seq("meta", "mentionsBeginOffsets", "mentionsContents", "mentionsTypes")
+            case x if x.startsWith("stanford:") => Seq("nne")
+            case x if x.startsWith("opennlp:") => Seq.empty
+            case x if x.startsWith("spacy:") => Seq("vector")
+
+            case _ => throw new AssertionError(s"Unexpected note type: $noteType")
+        }
+    }
 }
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/synonyms/NCSynonymsManager.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/synonyms/NCSynonymsManager.scala
index e9bf751..5386c83 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/synonyms/NCSynonymsManager.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/synonyms/NCSynonymsManager.scala
@@ -18,25 +18,44 @@
 package org.apache.nlpcraft.probe.mgrs.synonyms
 
 import io.opencensus.trace.Span
-import org.apache.nlpcraft.common.nlp.NCNlpSentenceToken
+import org.apache.nlpcraft.common.nlp.{NCNlpSentenceNote, NCNlpSentenceToken}
 import org.apache.nlpcraft.common.{NCService, U}
-import org.apache.nlpcraft.model.intent.{NCIdlContext, NCIdlFunction}
 import org.apache.nlpcraft.model._
+import org.apache.nlpcraft.model.intent.{NCIdlContext, NCIdlFunction}
 import org.apache.nlpcraft.probe.mgrs.NCProbeSynonym.NCIdlContent
 import org.apache.nlpcraft.probe.mgrs.NCProbeSynonymChunkKind.{IDL, NCSynonymChunkKind, REGEX, TEXT}
 import org.apache.nlpcraft.probe.mgrs.{NCProbeSynonymChunk, NCProbeSynonym => Synonym}
 
 import scala.collection.mutable
+import scala.compat.java8.OptionConverters._
 import scala.jdk.CollectionConverters.ListHasAsScala
 
 /**
   *
   */
 object NCSynonymsManager extends NCService {
-    case class Value(request: NCRequest, variants: Seq[Seq[NCToken]], predicate: NCIdlFunction)
+    case class Key(id: String, startCharIndex: Int, endCharIndex: Int, other: Map[String, AnyRef])
+
+    object Key {
+        def apply(t: NCToken): Key = {
+            val m =
+                if (t.isUserDefined)
+                    Map.empty[String, AnyRef]
+                else
+                    NCNlpSentenceNote.getBuiltProperties(t.getId).flatMap(p => t.metaOpt(p).asScala match {
+                        case Some(v) => Some(p -> v)
+                        case None => None
+                    }).toMap
+
+            Key(t.getId, t.getStartCharIndex, t.getEndCharIndex, m)
+        }
+    }
+
+    case class Value(request: NCRequest, variants: Seq[Seq[NCToken]], predicate: NCIdlFunction) {
+        override def toString: String = variants.toString()
+    }
 
-    // TODO: NCToken is not suitable key
-    private val idlCache = mutable.HashMap.empty[String, mutable.HashMap[NCToken, Value]]
+    private val idlCache = mutable.HashMap.empty[String, mutable.HashMap[Key, mutable.ArrayBuffer[Value]]]
 
     override def start(parent: Span): NCService = {
         ackStarting()
@@ -143,8 +162,12 @@ object NCSynonymsManager extends NCService {
       * @param pred
       * @param variantsToks
       */
-    private def save(req: NCRequest, tok: NCToken, pred: NCIdlFunction, variantsToks: Seq[Seq[NCToken]]): Unit =
-        idlCache.getOrElseUpdate(req.getServerRequestId, mutable.HashMap.empty) += tok -> Value(req, variantsToks, pred)
+    private def save(req: NCRequest, tok: NCToken, pred: NCIdlFunction, variantsToks: Seq[Seq[NCToken]]): Unit = {
+        idlCache.
+            getOrElseUpdate(req.getServerRequestId, mutable.HashMap.empty).
+            getOrElseUpdate(Key(tok), mutable.ArrayBuffer.empty) +=
+            Value(req, variantsToks, pred)
+    }
 
     /**
       *
@@ -267,10 +290,10 @@ object NCSynonymsManager extends NCService {
         idlCache.get(srvReqId) match {
             case Some(m) =>
                 lazy val allCheckedSenToks = {
-                    val set = mutable.HashSet.empty[NCToken]
+                    val set = mutable.ArrayBuffer.empty[Key]
 
                     def add(t: NCToken): Unit = {
-                        set += t
+                        set += Key(t)
 
                         t.getPartTokens.asScala.foreach(add)
                     }
@@ -281,17 +304,36 @@ object NCSynonymsManager extends NCService {
                 }
 
                 sen.forall(tok =>
-                    m.get(tok) match {
-                        case Some(v) =>
-                            v.variants.exists(winHistVariant =>
-                                v.predicate.apply(
-                                    tok, NCIdlContext(toks = winHistVariant, req = v.request)
-                                ).value.asInstanceOf[Boolean] &&
-                                winHistVariant.forall(allCheckedSenToks.contains)
+                    m.get(Key(tok)) match {
+                        case Some(vals) =>
+                            vals.exists(
+                                v => {
+                                    val x = v.variants.exists(winHistVariant =>
+                                        v.predicate.apply(
+                                            tok, NCIdlContext(toks = winHistVariant, req = v.request)
+                                        ).value.asInstanceOf[Boolean] &&
+                                            winHistVariant.map(Key(_)).forall(allCheckedSenToks.contains)
+                                    )
+
+//                                    if (sen.map(_.getNormalizedText).mkString(" ") == "sort A by A".toLowerCase) {
+//
+//
+//                                        println("!!!CHECK =variants=\n" + v.variants.map(_.mkString("\n")).mkString("\n"))
+//                                        println("!!!CHECK =variantsKeys=\n" + v.variants.map(_.map(Key(_)).mkString("\n")).mkString("\n"))
+//
+//                                        println("!!!CHECK x=" + x)
+//
+//                                        println("+++++")
+//                                    }
+
+                                    x
+                                }
                             )
 
+
                         case None => true
                     })
+
             case None => true
         }
 
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/NCServerEnrichmentManager.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/NCServerEnrichmentManager.scala
index 2f457cb..5d43b7b 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/NCServerEnrichmentManager.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/NCServerEnrichmentManager.scala
@@ -156,7 +156,7 @@ object NCServerEnrichmentManager extends NCService with NCIgniteInstance {
                         if (h.enabledBuiltInTokens == normEnabledBuiltInToks) {
                             prepareAsciiTable(h.sentence).info(logger, Some(s"Sentence enriched (from cache): '$normTxt'"))
 
-                            h.sentence.copy(Some(U.genGuid()))
+                            h.sentence.copy(Some(srvReqId))
                         }
                         else
                             process(srvReqId, normTxt, enabledBuiltInToks, span)
diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/sort/NCEnricherSortSpec.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/sort/NCEnricherSortSpec.scala
index 7b8d858..d19de00 100644
--- a/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/sort/NCEnricherSortSpec.scala
+++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/sort/NCEnricherSortSpec.scala
@@ -50,11 +50,11 @@ class NCEnricherSortSpec extends NCEnricherBaseSpec {
     @Test
     def test(): Unit =
         runBatch(
-            _ => checkExists(
-                "sort A",
-                srt(text = "sort", typ = SUBJ_ONLY, note = "A", index = 1),
-                usr("A", "A")
-            ),
+//            _ => checkExists(
+//                "sort A",
+//                srt(text = "sort", typ = SUBJ_ONLY, note = "A", index = 1),
+//                usr("A", "A")
+//            ),
             _ => checkExists(
                 "sort A by A",
                 srt(text = "sort", subjNote = "A", subjIndex = 1, byNote = "A", byIndex = 3),
@@ -62,169 +62,169 @@ class NCEnricherSortSpec extends NCEnricherBaseSpec {
                 nlp(text = "by", isStop = true),
                 usr(text = "A", id = "A")
             ),
-            _ => checkExists(
-                "sort A, C by A, C",
-                srt(text = "sort", subjNotes = Seq("A", "C"), subjIndexes = Seq(1, 3), byNotes = Seq("A", "C"), byIndexes = Seq(5, 7)),
-                usr(text = "A", id = "A"),
-                nlp(text = ",", isStop = true),
-                usr(text = "C", id = "C"),
-                nlp(text = "by", isStop = true),
-                usr(text = "A", id = "A"),
-                nlp(text = ",", isStop = true),
-                usr(text = "C", id = "C")
-            ),
-            _ => checkExists(
-                "sort A C by A C",
-                srt(text = "sort", subjNotes = Seq("A", "C"), subjIndexes = Seq(1, 2), byNotes = Seq("A", "C"), byIndexes = Seq(4, 5)),
-                usr(text = "A", id = "A"),
-                usr(text = "C", id = "C"),
-                nlp(text = "by", isStop = true),
-                usr(text = "A", id = "A"),
-                usr(text = "C", id = "C")
-            ),
-            _ => checkExists(
-                "sort A B by A B",
-                srt(text = "sort", subjNotes = Seq("A", "B"), subjIndexes = Seq(1, 2), byNotes = Seq("A", "B"), byIndexes = Seq(4, 5)),
-                usr(text = "A", id = "A"),
-                usr(text = "B", id = "B"),
-                nlp(text = "by", isStop = true),
-                usr(text = "A", id = "A"),
-                usr(text = "B", id = "B")
-            ),
-            _ => checkExists(
-                "sort A B by A B",
-                srt(text = "sort", subjNote = "AB", subjIndex = 1, byNote = "AB", byIndex = 3),
-                usr(text = "A B", id = "AB"),
-                nlp(text = "by", isStop = true),
-                usr(text = "A B", id = "AB")
-            ),
-            _ => checkExists(
-                "A classify",
-                usr(text = "A", id = "A"),
-                srt(text = "classify", typ = SUBJ_ONLY, note = "A", index = 0)
-            ),
-            _ => checkExists(
-                "the A the classify",
-                nlp(text = "the", isStop = true),
-                usr(text = "A", id = "A"),
-                nlp(text = "the", isStop = true),
-                srt(text = "classify", typ = SUBJ_ONLY, note = "A", index = 1)
-            ),
-            _ => checkExists(
-                "segment A by top down",
-                srt(text = "segment", typ = SUBJ_ONLY, note = "A", index = 1, asc = false),
-                usr(text = "A", id = "A"),
-                nlp(text = "by top down", isStop = true)
-            ),
-            _ => checkExists(
-                "segment A in bottom up order",
-                srt(text = "segment", typ = SUBJ_ONLY, note = "A", index = 1, asc = true),
-                usr(text = "A", id = "A"),
-                nlp(text = "in bottom up order", isStop = true)
-            ),
-            // `by` is redundant word here
-            _ => checkExists(
-                "segment A by in bottom up order",
-                srt(text = "segment", typ = SUBJ_ONLY, note = "A", index = 1),
-                usr(text = "A", id = "A"),
-                nlp(text = "by"),
-                nlp(text = "in"),
-                nlp(text = "bottom"),
-                nlp(text = "up"),
-                nlp(text = "order")
-            ),
-            _ => checkExists(
-                "the segment the A the in bottom up the order the",
-                nlp(text = "the", isStop = true),
-                srt(text = "segment", typ = SUBJ_ONLY, note = "A", index = 3, asc = true),
-                nlp(text = "the", isStop = true),
-                usr(text = "A", id = "A"),
-                nlp(text = "the in bottom up the order the", isStop = true)
-            ),
-            _ => checkExists(
-                "the segment the A the by bottom up the order the",
-                nlp(text = "the", isStop = true),
-                srt(text = "segment", typ = SUBJ_ONLY, note = "A", index = 3, asc = true),
-                nlp(text = "the", isStop = true),
-                usr(text = "A", id = "A"),
-                nlp(text = "the by bottom up the order the", isStop = true)
-            ),
-            _ => checkExists(
-                "A classify",
-                usr(text = "A", id = "A"),
-                srt(text = "classify", typ = SUBJ_ONLY, note = "A", index = 0)
-            ),
-            _ => checkAll(
-                "A B classify",
-                Seq(
-                    usr(text = "A B", id = "AB"),
-                    srt(text = "classify", typ = SUBJ_ONLY, note = "AB", index = 0)
-                ),
-                Seq(
-                    usr(text = "A", id = "A"),
-                    usr(text = "B", id = "B"),
-                    srt(text = "classify", subjNotes = Seq("A", "B"), subjIndexes = Seq(0, 1))
-                ),
-                Seq(
-                    usr(text = "A", id = "A"),
-                    usr(text = "B", id = "B"),
-                    srt(text = "classify", subjNotes = Seq("B"), subjIndexes = Seq(1))
-                )
-            ),
-            _ => checkAll(
-                "D classify",
-                Seq(
-                    usr(text = "D", id = "D1"),
-                    srt(text = "classify", typ = SUBJ_ONLY, note = "D1", index = 0)
-                ),
-                Seq(
-                    usr(text = "D", id = "D2"),
-                    srt(text = "classify", typ = SUBJ_ONLY, note = "D2", index = 0)
-                )
-            ),
-            _ => checkAll(
-                "sort by A",
-                Seq(
-                    srt(text = "sort by", typ = BY_ONLY, note = "A", index = 1),
-                    usr(text = "A", id = "A")
-                )
-            ),
-            _ => checkExists(
-                "organize by A, B top down",
-                srt(text = "organize by", byNotes = Seq("A", "B"), byIndexes = Seq(1, 3), asc = Some(false)),
-                usr(text = "A", id = "A"),
-                nlp(text = ",", isStop = true),
-                usr(text = "B", id = "B"),
-                nlp(text = "top down", isStop = true)
-            ),
-            _ => checkExists(
-                "organize by A, B from bottom up order",
-                srt(text = "organize by", byNotes = Seq("A", "B"), byIndexes = Seq(1, 3), asc = Some(true)),
-                usr(text = "A", id = "A"),
-                nlp(text = ",", isStop = true),
-                usr(text = "B", id = "B"),
-                nlp(text = "from bottom up order", isStop = true)
-            ),
-            _ => checkExists(
-                "organize by A, B the descending",
-                srt(text = "organize by", byNotes = Seq("A", "B"), byIndexes = Seq(1, 3), asc = Some(false)),
-                usr(text = "A", id = "A"),
-                nlp(text = ",", isStop = true),
-                usr(text = "B", id = "B"),
-                nlp(text = "the descending", isStop = true)
-            ),
-            _ => checkExists(
-                "organize by A, B, asc",
-                srt(text = "organize by", byNotes = Seq("A", "B"), byIndexes = Seq(1, 3), asc = Some(true)),
-                usr(text = "A", id = "A"),
-                nlp(text = ",", isStop = true),
-                usr(text = "B", id = "B"),
-                nlp(text = ", asc", isStop = true)
-            ),
-            _ => checkExists(
-                "sort A the A the A",
-                srt(text = "sort", typ = SUBJ_ONLY, note = "wrapperA", index = 1),
-                usr("A the A the A", "wrapperA")
-            )
+//            _ => checkExists(
+//                "sort A, C by A, C",
+//                srt(text = "sort", subjNotes = Seq("A", "C"), subjIndexes = Seq(1, 3), byNotes = Seq("A", "C"), byIndexes = Seq(5, 7)),
+//                usr(text = "A", id = "A"),
+//                nlp(text = ",", isStop = true),
+//                usr(text = "C", id = "C"),
+//                nlp(text = "by", isStop = true),
+//                usr(text = "A", id = "A"),
+//                nlp(text = ",", isStop = true),
+//                usr(text = "C", id = "C")
+//            ),
+//            _ => checkExists(
+//                "sort A C by A C",
+//                srt(text = "sort", subjNotes = Seq("A", "C"), subjIndexes = Seq(1, 2), byNotes = Seq("A", "C"), byIndexes = Seq(4, 5)),
+//                usr(text = "A", id = "A"),
+//                usr(text = "C", id = "C"),
+//                nlp(text = "by", isStop = true),
+//                usr(text = "A", id = "A"),
+//                usr(text = "C", id = "C")
+//            ),
+//            _ => checkExists(
+//                "sort A B by A B",
+//                srt(text = "sort", subjNotes = Seq("A", "B"), subjIndexes = Seq(1, 2), byNotes = Seq("A", "B"), byIndexes = Seq(4, 5)),
+//                usr(text = "A", id = "A"),
+//                usr(text = "B", id = "B"),
+//                nlp(text = "by", isStop = true),
+//                usr(text = "A", id = "A"),
+//                usr(text = "B", id = "B")
+//            ),
+//            _ => checkExists(
+//                "sort A B by A B",
+//                srt(text = "sort", subjNote = "AB", subjIndex = 1, byNote = "AB", byIndex = 3),
+//                usr(text = "A B", id = "AB"),
+//                nlp(text = "by", isStop = true),
+//                usr(text = "A B", id = "AB")
+//            ),
+//            _ => checkExists(
+//                "A classify",
+//                usr(text = "A", id = "A"),
+//                srt(text = "classify", typ = SUBJ_ONLY, note = "A", index = 0)
+//            ),
+//            _ => checkExists(
+//                "the A the classify",
+//                nlp(text = "the", isStop = true),
+//                usr(text = "A", id = "A"),
+//                nlp(text = "the", isStop = true),
+//                srt(text = "classify", typ = SUBJ_ONLY, note = "A", index = 1)
+//            ),
+//            _ => checkExists(
+//                "segment A by top down",
+//                srt(text = "segment", typ = SUBJ_ONLY, note = "A", index = 1, asc = false),
+//                usr(text = "A", id = "A"),
+//                nlp(text = "by top down", isStop = true)
+//            ),
+//            _ => checkExists(
+//                "segment A in bottom up order",
+//                srt(text = "segment", typ = SUBJ_ONLY, note = "A", index = 1, asc = true),
+//                usr(text = "A", id = "A"),
+//                nlp(text = "in bottom up order", isStop = true)
+//            ),
+//            // `by` is redundant word here
+//            _ => checkExists(
+//                "segment A by in bottom up order",
+//                srt(text = "segment", typ = SUBJ_ONLY, note = "A", index = 1),
+//                usr(text = "A", id = "A"),
+//                nlp(text = "by"),
+//                nlp(text = "in"),
+//                nlp(text = "bottom"),
+//                nlp(text = "up"),
+//                nlp(text = "order")
+//            ),
+//            _ => checkExists(
+//                "the segment the A the in bottom up the order the",
+//                nlp(text = "the", isStop = true),
+//                srt(text = "segment", typ = SUBJ_ONLY, note = "A", index = 3, asc = true),
+//                nlp(text = "the", isStop = true),
+//                usr(text = "A", id = "A"),
+//                nlp(text = "the in bottom up the order the", isStop = true)
+//            ),
+//            _ => checkExists(
+//                "the segment the A the by bottom up the order the",
+//                nlp(text = "the", isStop = true),
+//                srt(text = "segment", typ = SUBJ_ONLY, note = "A", index = 3, asc = true),
+//                nlp(text = "the", isStop = true),
+//                usr(text = "A", id = "A"),
+//                nlp(text = "the by bottom up the order the", isStop = true)
+//            ),
+//            _ => checkExists(
+//                "A classify",
+//                usr(text = "A", id = "A"),
+//                srt(text = "classify", typ = SUBJ_ONLY, note = "A", index = 0)
+//            ),
+//            _ => checkAll(
+//                "A B classify",
+//                Seq(
+//                    usr(text = "A B", id = "AB"),
+//                    srt(text = "classify", typ = SUBJ_ONLY, note = "AB", index = 0)
+//                ),
+//                Seq(
+//                    usr(text = "A", id = "A"),
+//                    usr(text = "B", id = "B"),
+//                    srt(text = "classify", subjNotes = Seq("A", "B"), subjIndexes = Seq(0, 1))
+//                ),
+//                Seq(
+//                    usr(text = "A", id = "A"),
+//                    usr(text = "B", id = "B"),
+//                    srt(text = "classify", subjNotes = Seq("B"), subjIndexes = Seq(1))
+//                )
+//            ),
+//            _ => checkAll(
+//                "D classify",
+//                Seq(
+//                    usr(text = "D", id = "D1"),
+//                    srt(text = "classify", typ = SUBJ_ONLY, note = "D1", index = 0)
+//                ),
+//                Seq(
+//                    usr(text = "D", id = "D2"),
+//                    srt(text = "classify", typ = SUBJ_ONLY, note = "D2", index = 0)
+//                )
+//            ),
+//            _ => checkAll(
+//                "sort by A",
+//                Seq(
+//                    srt(text = "sort by", typ = BY_ONLY, note = "A", index = 1),
+//                    usr(text = "A", id = "A")
+//                )
+//            ),
+//            _ => checkExists(
+//                "organize by A, B top down",
+//                srt(text = "organize by", byNotes = Seq("A", "B"), byIndexes = Seq(1, 3), asc = Some(false)),
+//                usr(text = "A", id = "A"),
+//                nlp(text = ",", isStop = true),
+//                usr(text = "B", id = "B"),
+//                nlp(text = "top down", isStop = true)
+//            ),
+//            _ => checkExists(
+//                "organize by A, B from bottom up order",
+//                srt(text = "organize by", byNotes = Seq("A", "B"), byIndexes = Seq(1, 3), asc = Some(true)),
+//                usr(text = "A", id = "A"),
+//                nlp(text = ",", isStop = true),
+//                usr(text = "B", id = "B"),
+//                nlp(text = "from bottom up order", isStop = true)
+//            ),
+//            _ => checkExists(
+//                "organize by A, B the descending",
+//                srt(text = "organize by", byNotes = Seq("A", "B"), byIndexes = Seq(1, 3), asc = Some(false)),
+//                usr(text = "A", id = "A"),
+//                nlp(text = ",", isStop = true),
+//                usr(text = "B", id = "B"),
+//                nlp(text = "the descending", isStop = true)
+//            ),
+//            _ => checkExists(
+//                "organize by A, B, asc",
+//                srt(text = "organize by", byNotes = Seq("A", "B"), byIndexes = Seq(1, 3), asc = Some(true)),
+//                usr(text = "A", id = "A"),
+//                nlp(text = ",", isStop = true),
+//                usr(text = "B", id = "B"),
+//                nlp(text = ", asc", isStop = true)
+//            ),
+//            _ => checkExists(
+//                "sort A the A the A",
+//                srt(text = "sort", typ = SUBJ_ONLY, note = "wrapperA", index = 1),
+//                usr("A the A the A", "wrapperA")
+//            )
         )
 }

[incubator-nlpcraft] 02/02: WIP.

Posted by se...@apache.org.

This is an automated email from the ASF dual-hosted git repository.

sergeykamov pushed a commit to branch NLPCRAFT-443-1
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git

commit 12bfeadcbeabdaf6af1edf740d97ea6cef7684ba
Author: Sergey Kamov <sk...@gmail.com>
AuthorDate: Thu Sep 23 18:31:18 2021 +0300

    WIP.
---
 .../nlpcraft/probe/mgrs/NCProbeVariants.scala      |  2 +-
 .../probe/mgrs/synonyms/NCSynonymsManager.scala    | 64 ++++++++++------------
 2 files changed, 31 insertions(+), 35 deletions(-)

diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeVariants.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeVariants.scala
index 8aced5f..8bc0113 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeVariants.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeVariants.scala
@@ -268,7 +268,7 @@ object NCProbeVariants {
                 for ((tok, tokNlp) <- toks.zip(nlpSen) if tokNlp.isUser)
                     process(tok, tokNlp)
 
-                ok = ok && NCSynonymsManager.isStillValid(srvReqId, toks.toSeq)
+                ok = ok && NCSynonymsManager.isStillValid(srvReqId, toks.toSeq, nlpSen)
 
                 if (ok) Some(new NCVariantImpl(toks.asJava)) else None
             })
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/synonyms/NCSynonymsManager.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/synonyms/NCSynonymsManager.scala
index 5386c83..25987dd 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/synonyms/NCSynonymsManager.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/synonyms/NCSynonymsManager.scala
@@ -18,7 +18,7 @@
 package org.apache.nlpcraft.probe.mgrs.synonyms
 
 import io.opencensus.trace.Span
-import org.apache.nlpcraft.common.nlp.{NCNlpSentenceNote, NCNlpSentenceToken}
+import org.apache.nlpcraft.common.nlp.{NCNlpSentence, NCNlpSentenceNote, NCNlpSentenceToken}
 import org.apache.nlpcraft.common.{NCService, U}
 import org.apache.nlpcraft.model._
 import org.apache.nlpcraft.model.intent.{NCIdlContext, NCIdlFunction}
@@ -34,21 +34,22 @@ import scala.jdk.CollectionConverters.ListHasAsScala
   *
   */
 object NCSynonymsManager extends NCService {
-    case class Key(id: String, startCharIndex: Int, endCharIndex: Int, other: Map[String, AnyRef])
+    case class Key(id: String, startCharIndex: Int, endCharIndex: Int, other: Map[String, AnyRef] = Map.empty)
 
     object Key {
-        def apply(t: NCToken): Key = {
-            val m =
-                if (t.isUserDefined)
-                    Map.empty[String, AnyRef]
-                else
+        def apply(t: NCToken): Key =
+            if (t.isUserDefined)
+                Key(t.getId, t.getStartCharIndex, t.getEndCharIndex)
+            else
+                Key(
+                    t.getId,
+                    t.getStartCharIndex,
+                    t.getEndCharIndex,
                     NCNlpSentenceNote.getBuiltProperties(t.getId).flatMap(p => t.metaOpt(p).asScala match {
                         case Some(v) => Some(p -> v)
                         case None => None
                     }).toMap
-
-            Key(t.getId, t.getStartCharIndex, t.getEndCharIndex, m)
-        }
+                )
     }
 
     case class Value(request: NCRequest, variants: Seq[Seq[NCToken]], predicate: NCIdlFunction) {
@@ -283,14 +284,14 @@ object NCSynonymsManager extends NCService {
     /**
       *
       * @param srvReqId
-      * @param sen
-      * @return
+      * @param senToks
+      * @param nlpSen
       */
-    def isStillValid(srvReqId: String, sen: Seq[NCToken]): Boolean =
+    def isStillValid(srvReqId: String, senToks: Seq[NCToken], nlpSen: NCNlpSentence): Boolean =
         idlCache.get(srvReqId) match {
             case Some(m) =>
                 lazy val allCheckedSenToks = {
-                    val set = mutable.ArrayBuffer.empty[Key]
+                    val set = mutable.HashSet.empty[Key]
 
                     def add(t: NCToken): Unit = {
                         set += Key(t)
@@ -298,39 +299,34 @@ object NCSynonymsManager extends NCService {
                         t.getPartTokens.asScala.foreach(add)
                     }
 
-                    sen.foreach(add)
+                    senToks.foreach(add)
+
+                    set ++=
+                        senToks.flatMap(t =>
+                            nlpSen.getInitialNlpNote(t.getStartCharIndex, t.getEndCharIndex) match {
+                                case Some(nlpNote) => Some(Key(nlpNote.noteType, t.getStartCharIndex, t.getEndCharIndex))
+                                case None => None
+                            }
+                        )
 
                     set
                 }
 
-                sen.forall(tok =>
+                senToks.forall(tok =>
                     m.get(Key(tok)) match {
                         case Some(vals) =>
                             vals.exists(
-                                v => {
-                                    val x = v.variants.exists(winHistVariant =>
+                                v =>
+                                    v.variants.exists(winHistVariant =>
                                         v.predicate.apply(
                                             tok, NCIdlContext(toks = winHistVariant, req = v.request)
                                         ).value.asInstanceOf[Boolean] &&
-                                            winHistVariant.map(Key(_)).forall(allCheckedSenToks.contains)
+                                            winHistVariant.map(Key(_)).forall(
+                                                allCheckedSenToks.contains
+                                            )
                                     )
-
-//                                    if (sen.map(_.getNormalizedText).mkString(" ") == "sort A by A".toLowerCase) {
-//
-//
-//                                        println("!!!CHECK =variants=\n" + v.variants.map(_.mkString("\n")).mkString("\n"))
-//                                        println("!!!CHECK =variantsKeys=\n" + v.variants.map(_.map(Key(_)).mkString("\n")).mkString("\n"))
-//
-//                                        println("!!!CHECK x=" + x)
-//
-//                                        println("+++++")
-//                                    }
-
-                                    x
-                                }
                             )
 
-
                         case None => true
                     })