You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nlpcraft.apache.org by se...@apache.org on 2021/09/23 05:54:47 UTC
[incubator-nlpcraft] 02/02: WIP.

This is an automated email from the ASF dual-hosted git repository.

sergeykamov pushed a commit to branch NLPCRAFT-443-1
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git

commit 267e82f5f9b888c5080e272fdd23d8db4e600aeb
Author: Sergey Kamov <sk...@gmail.com>
AuthorDate: Thu Sep 23 08:54:32 2021 +0300

    WIP.
---
 .../apache/nlpcraft/common/nlp/NCNlpSentence.scala | 12 ++++
 .../nlpcraft/common/nlp/NCNlpSentenceNote.scala    |  9 +--
 .../nlpcraft/common/nlp/NCNlpSentenceToken.scala   | 12 +---
 .../org/apache/nlpcraft/probe/NCProbeBoot.scala    |  3 +-
 .../nlpcraft/probe/mgrs/NCProbeVariants.scala      |  4 +-
 .../probe/mgrs/nlp/NCProbeEnrichmentManager.scala  |  9 +--
 .../mgrs/nlp/enrichers/model/NCModelEnricher.scala |  9 +--
 .../probe/mgrs/sentence/NCSentenceManager.scala    |  1 -
 .../{sentence => synonyms}/NCSynonymsManager.scala | 69 ++++++++++++----------
 .../nlp/enrichers/NCServerEnrichmentManager.scala  |  4 +-
 10 files changed, 69 insertions(+), 63 deletions(-)

diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala
index 0f0b462..40f5da6 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala
@@ -74,6 +74,18 @@ class NCNlpSentence(
             firstProbePhase = firstProbePhase
         )
 
+    def copy(srvReqId: Option[String]): NCNlpSentence =
+        new NCNlpSentence(
+            srvReqId = srvReqId.getOrElse(this.srvReqId),
+            text = this.text,
+            enabledBuiltInToks = this.enabledBuiltInToks,
+            tokens = this.tokens,
+            deletedNotes = this.deletedNotes,
+            initNlpNotes = this.initNlpNotes,
+            nlpTokens = this.nlpTokens,
+            firstProbePhase = this.firstProbePhase
+        )
+
     /**
       * Utility method that gets set of notes for given note type collected from
       * tokens in this sentence. Notes are sorted in the same order they appear
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentenceNote.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentenceNote.scala
index 63ae6ca..c457aa7 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentenceNote.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentenceNote.scala
@@ -77,14 +77,11 @@ class NCNlpSentenceNote(private val values: Map[String, JSerializable]) extends
             indexes,
             Some(wordIndexes),
             noteType,
-            values.filter(p => !SKIP_CLONE.contains(p._1)).toSeq ++ params:_*
+            dataWithoutIndexes.toSeq ++ params:_*
         )
 
-    override def clone(): NCNlpSentenceNote = {
-        val m = mutable.Map.empty[String, JSerializable] ++ values
-
-        new NCNlpSentenceNote(m.toMap)
-    }
+    override def clone(): NCNlpSentenceNote =
+        new NCNlpSentenceNote((mutable.HashMap.empty[String, JSerializable] ++ values).toMap)
 
     /**
       *
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentenceToken.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentenceToken.scala
index 4b94b98..fa9cbe6 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentenceToken.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentenceToken.scala
@@ -67,17 +67,7 @@ case class NCNlpSentenceToken(
       * Shallow copy.
       */
     def clone(index: Int): NCNlpSentenceToken =
-        NCNlpSentenceToken(
-            index,
-            {
-                val m = mutable.HashSet.empty[NCNlpSentenceNote]
-
-                notes.foreach(n => m += n.clone())
-
-                m
-            },
-            stopsReasons.clone()
-        )
+        NCNlpSentenceToken(index, mutable.HashSet.empty[NCNlpSentenceNote]  ++ notes.clone(), stopsReasons.clone())
 
     /**
       * Clones note.
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/NCProbeBoot.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/NCProbeBoot.scala
index 4df9f53..561860f 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/NCProbeBoot.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/NCProbeBoot.scala
@@ -49,7 +49,8 @@ import org.apache.nlpcraft.probe.mgrs.nlp.enrichers.sort.NCSortEnricher
 import org.apache.nlpcraft.probe.mgrs.nlp.enrichers.stopword.NCStopWordEnricher
 import org.apache.nlpcraft.probe.mgrs.nlp.enrichers.suspicious.NCSuspiciousNounsEnricher
 import org.apache.nlpcraft.probe.mgrs.nlp.validate.NCValidateManager
-import org.apache.nlpcraft.probe.mgrs.sentence.{NCSentenceManager, NCSynonymsManager}
+import org.apache.nlpcraft.probe.mgrs.sentence.NCSentenceManager
+import org.apache.nlpcraft.probe.mgrs.synonyms.NCSynonymsManager
 
 import java.io._
 import java.util.concurrent.CompletableFuture
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeVariants.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeVariants.scala
index e876065..0596783 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeVariants.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeVariants.scala
@@ -22,7 +22,7 @@ import org.apache.nlpcraft.common.nlp.{NCNlpSentence => NlpSentence, NCNlpSenten
 import org.apache.nlpcraft.common.{NCE, TOK_META_ALIASES_KEY}
 import org.apache.nlpcraft.model.NCVariant
 import org.apache.nlpcraft.model.impl.{NCTokenImpl, NCTokenLogger, NCVariantImpl}
-import org.apache.nlpcraft.probe.mgrs.sentence.NCSynonymsManager
+import org.apache.nlpcraft.probe.mgrs.synonyms.NCSynonymsManager
 
 import java.io.{Serializable => JSerializable}
 import java.util
@@ -268,7 +268,7 @@ object NCProbeVariants {
                 for ((tok, tokNlp) <- toks.zip(nlpSen) if tokNlp.isUser)
                     process(tok, tokNlp)
 
-                ok = ok  && NCSynonymsManager.isStillValid(srvReqId, toks.toSeq)
+                ok = ok && (!lastPhase || NCSynonymsManager.isStillValid(srvReqId, toks.toSeq))
 
                 if (ok) Some(new NCVariantImpl(toks.asJava)) else None
             })
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/NCProbeEnrichmentManager.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/NCProbeEnrichmentManager.scala
index 64049ac..20dc64d 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/NCProbeEnrichmentManager.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/NCProbeEnrichmentManager.scala
@@ -43,7 +43,8 @@ import org.apache.nlpcraft.probe.mgrs.nlp.enrichers.stopword.NCStopWordEnricher
 import org.apache.nlpcraft.probe.mgrs.nlp.enrichers.suspicious.NCSuspiciousNounsEnricher
 import org.apache.nlpcraft.probe.mgrs.nlp.impl._
 import org.apache.nlpcraft.probe.mgrs.nlp.validate._
-import org.apache.nlpcraft.probe.mgrs.sentence.{NCSentenceManager, NCSynonymsManager}
+import org.apache.nlpcraft.probe.mgrs.sentence.NCSentenceManager
+import org.apache.nlpcraft.probe.mgrs.synonyms.NCSynonymsManager
 import org.apache.nlpcraft.probe.mgrs.{NCProbeMessage, NCProbeVariants}
 
 import java.io.Serializable
@@ -294,6 +295,9 @@ object NCProbeEnrichmentManager extends NCService with NCOpenCensusModelStats {
         ): Unit = {
             require(errMsg.isDefined || (resType.isDefined && resBody.isDefined))
 
+            NCSentenceManager.clearRequestData(srvReqId)
+            NCSynonymsManager.clearRequestData(srvReqId)
+
             val msg = NCProbeMessage(msgName)
 
             msg.addData("srvReqId", srvReqId)
@@ -554,9 +558,6 @@ object NCProbeEnrichmentManager extends NCService with NCOpenCensusModelStats {
 
         var senVars = NCProbeVariants.convert(srvReqId, mdl, sensSeq, lastPhase = true)
 
-        NCSentenceManager.clearRequestData(srvReqId)
-        NCSynonymsManager.clearRequestData(srvReqId)
-
         // Sentence variants can be filtered by model.
         val fltSenVars: Seq[(NCVariant, Int)] =
             senVars.
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
index 03c5b5d..c5ca532 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
@@ -27,7 +27,8 @@ import org.apache.nlpcraft.probe.mgrs.NCProbeSynonym.NCIdlContent
 import org.apache.nlpcraft.probe.mgrs.NCProbeSynonymChunkKind.NCSynonymChunkKind
 import org.apache.nlpcraft.probe.mgrs.nlp.NCProbeEnricher
 import org.apache.nlpcraft.probe.mgrs.nlp.impl.NCRequestImpl
-import org.apache.nlpcraft.probe.mgrs.sentence.{NCSentenceManager, NCSynonymsManager}
+import org.apache.nlpcraft.probe.mgrs.sentence.NCSentenceManager
+import org.apache.nlpcraft.probe.mgrs.synonyms.NCSynonymsManager
 import org.apache.nlpcraft.probe.mgrs.{NCProbeModel, NCProbeVariants, NCTokenPartKey, NCProbeSynonym => Synonym}
 
 import java.io.Serializable
@@ -535,11 +536,11 @@ object NCModelEnricher extends NCProbeEnricher {
                         p.token
                     else {
                         // TODO: everywhere
-                        val clone = p.word.clone()
+                        val notes = mutable.HashSet.empty[NlpNote]
 
-                        clone.filter(!_.isNlp).foreach(clone.remove)
+                        notes += p.word.getNlpNote
 
-                        NCTokenImpl(mdl, ns.srvReqId, clone)
+                        NCTokenImpl(mdl, ns.srvReqId, NlpToken(p.word.index, notes, p.word.stopsReasons))
                     }))
 
             def execute(simpleEnabled: Boolean, idlEnabled: Boolean): Unit =
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/sentence/NCSentenceManager.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/sentence/NCSentenceManager.scala
index 2e280ac..34c3f87 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/sentence/NCSentenceManager.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/sentence/NCSentenceManager.scala
@@ -24,7 +24,6 @@ import org.apache.nlpcraft.common.nlp.{NCNlpSentence, NCNlpSentenceNote, NCNlpSe
 import org.apache.nlpcraft.common.{NCE, NCService, U, _}
 import org.apache.nlpcraft.model.NCModel
 import org.apache.nlpcraft.probe.mgrs.NCTokenPartKey
-import org.apache.nlpcraft.probe.mgrs.sentence.NCSynonymsManager.{idlCache, reqCache}
 
 import java.io.{Serializable => JSerializable}
 import java.util
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/sentence/NCSynonymsManager.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/synonyms/NCSynonymsManager.scala
similarity index 85%
rename from nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/sentence/NCSynonymsManager.scala
rename to nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/synonyms/NCSynonymsManager.scala
index cf5eb5d..e9bf751 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/sentence/NCSynonymsManager.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/synonyms/NCSynonymsManager.scala
@@ -15,33 +15,28 @@
  * limitations under the License.
  */
 
-package org.apache.nlpcraft.probe.mgrs.sentence
+package org.apache.nlpcraft.probe.mgrs.synonyms
 
 import io.opencensus.trace.Span
 import org.apache.nlpcraft.common.nlp.NCNlpSentenceToken
 import org.apache.nlpcraft.common.{NCService, U}
-import org.apache.nlpcraft.model._
 import org.apache.nlpcraft.model.intent.{NCIdlContext, NCIdlFunction}
+import org.apache.nlpcraft.model._
 import org.apache.nlpcraft.probe.mgrs.NCProbeSynonym.NCIdlContent
 import org.apache.nlpcraft.probe.mgrs.NCProbeSynonymChunkKind.{IDL, NCSynonymChunkKind, REGEX, TEXT}
 import org.apache.nlpcraft.probe.mgrs.{NCProbeSynonymChunk, NCProbeSynonym => Synonym}
 
 import scala.collection.mutable
+import scala.jdk.CollectionConverters.ListHasAsScala
 
 /**
   *
   */
 object NCSynonymsManager extends NCService {
-    case class Key(token: NCToken) {
-        // NCToken hashCode and equals based on indexes. // TODO: check it!
-        override def hashCode(): Int = U.mkJavaHash(token.getId, token)
-        override def equals(obj: Any): Boolean = obj match {
-            case key: Key => key.token.getId == token.getId && key.token == token
-        }
-    }
     case class Value(request: NCRequest, variants: Seq[Seq[NCToken]], predicate: NCIdlFunction)
 
-    private val idlCache = mutable.HashMap.empty[String, mutable.HashMap[Key, Value]]
+    // TODO: NCToken is not suitable key
+    private val idlCache = mutable.HashMap.empty[String, mutable.HashMap[NCToken, Value]]
 
     override def start(parent: Span): NCService = {
         ackStarting()
@@ -149,8 +144,7 @@ object NCSynonymsManager extends NCService {
       * @param variantsToks
       */
     private def save(req: NCRequest, tok: NCToken, pred: NCIdlFunction, variantsToks: Seq[Seq[NCToken]]): Unit =
-        idlCache.getOrElseUpdate(req.getServerRequestId, mutable.HashMap.empty) +=
-            Key(tok) -> Value(req, variantsToks, pred)
+        idlCache.getOrElseUpdate(req.getServerRequestId, mutable.HashMap.empty) += tok -> Value(req, variantsToks, pred)
 
     /**
       *
@@ -163,7 +157,8 @@ object NCSynonymsManager extends NCService {
         tow: NCIdlContent, chunk: NCProbeSynonymChunk, req: NCRequest, variantsToks: Seq[Seq[NCToken]]
     ): Boolean = {
         def get0[T](fromToken: NCToken => T, fromWord: NCNlpSentenceToken => T): T =
-            if (tow.isLeft) fromToken(tow.swap.toOption.get) else fromWord(tow.toOption.get)
+            if (tow.isLeft) fromToken(tow.swap.toOption.get)
+            else fromWord(tow.toOption.get)
 
         chunk.kind match {
             case TEXT => chunk.wordStem == get0(_.stem, _.stem)
@@ -217,7 +212,7 @@ object NCSynonymsManager extends NCService {
       * @param req
       * @param variantsToks
       */
-    def isMatch(s: Synonym, tows: Seq[NCIdlContent], req: NCRequest, variantsToks: Seq[Seq[NCToken]]): Boolean= {
+    def isMatch(s: Synonym, tows: Seq[NCIdlContent], req: NCRequest, variantsToks: Seq[Seq[NCToken]]): Boolean = {
         require(tows != null)
 
         if (tows.length == s.length && tows.count(_.isLeft) >= s.idlChunks)
@@ -256,7 +251,8 @@ object NCSynonymsManager extends NCService {
             s,
             tows,
             (t: NCIdlContent, chunk: NCProbeSynonymChunk) => isMatch(t, chunk, req, variantsToks),
-            (t: NCIdlContent) => if (t.isLeft) t.swap.toOption.get.getStartCharIndex else t.toOption.get.startCharIndex,
+            (t: NCIdlContent) => if (t.isLeft) t.swap.toOption.get.getStartCharIndex
+            else t.toOption.get.startCharIndex,
             shouldBeNeighbors = !s.sparse
         )
     }
@@ -264,31 +260,40 @@ object NCSynonymsManager extends NCService {
     /**
       *
       * @param srvReqId
-      * @param toks
+      * @param sen
       * @return
       */
-    def isStillValid(srvReqId: String, toks: Seq[NCToken]): Boolean =
-        toks.forall(tok =>
-            idlCache.get(srvReqId) match {
-                case Some(m) =>
-                    m.get(Key(tok)) match {
-                        case Some(v) =>
+    def isStillValid(srvReqId: String, sen: Seq[NCToken]): Boolean =
+        idlCache.get(srvReqId) match {
+            case Some(m) =>
+                lazy val allCheckedSenToks = {
+                    val set = mutable.HashSet.empty[NCToken]
 
+                    def add(t: NCToken): Unit = {
+                        set += t
 
-                            val x =
-                                v.predicate.apply(
-                                    tok, NCIdlContext(req = v.request, toks = toks)
-                                ).value.asInstanceOf[Boolean]
+                        t.getPartTokens.asScala.foreach(add)
+                    }
 
+                    sen.foreach(add)
 
-                            if (!x)
-                                println("x="+x + ", t=" + tok  + ", toks=" + toks)
+                    set
+                }
+
+                sen.forall(tok =>
+                    m.get(tok) match {
+                        case Some(v) =>
+                            v.variants.exists(winHistVariant =>
+                                v.predicate.apply(
+                                    tok, NCIdlContext(toks = winHistVariant, req = v.request)
+                                ).value.asInstanceOf[Boolean] &&
+                                winHistVariant.forall(allCheckedSenToks.contains)
+                            )
 
-                            x
                         case None => true
-                    }
-                case None => true
-            })
+                    })
+            case None => true
+        }
 
     /**
       *
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/NCServerEnrichmentManager.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/NCServerEnrichmentManager.scala
index 03b749f..2f457cb 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/NCServerEnrichmentManager.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/NCServerEnrichmentManager.scala
@@ -156,7 +156,7 @@ object NCServerEnrichmentManager extends NCService with NCIgniteInstance {
                         if (h.enabledBuiltInTokens == normEnabledBuiltInToks) {
                             prepareAsciiTable(h.sentence).info(logger, Some(s"Sentence enriched (from cache): '$normTxt'"))
 
-                            h.sentence
+                            h.sentence.copy(Some(U.genGuid()))
                         }
                         else
                             process(srvReqId, normTxt, enabledBuiltInToks, span)
@@ -224,7 +224,7 @@ object NCServerEnrichmentManager extends NCService with NCIgniteInstance {
                 .getNotes(hdr.noteType)
                 .filter(_.contains(hdr.noteName))
                 .map(note => {
-                    val s = note(hdr.noteName).toString()
+                    val s = note(hdr.noteName).toString
                     if (isStopWord) s"${r(s)}" else s
                 })
                 .toSeq