You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@nlpcraft.apache.org by se...@apache.org on 2021/09/21 09:23:05 UTC

[incubator-nlpcraft] branch NLPCRAFT-443-1 created (now 312fabf)

This is an automated email from the ASF dual-hosted git repository.

sergeykamov pushed a change to branch NLPCRAFT-443-1
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git.


      at 312fabf  WIP.

This branch includes the following new commits:

     new 312fabf  WIP.

The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.

[incubator-nlpcraft] 01/01: WIP.

Posted by se...@apache.org.

This is an automated email from the ASF dual-hosted git repository.

sergeykamov pushed a commit to branch NLPCRAFT-443-1
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git

commit 312fabf6420cfffcb5e89f1a432d45f198719a20
Author: Sergey Kamov <sk...@gmail.com>
AuthorDate: Tue Sep 21 12:22:57 2021 +0300

    WIP.
---
 .../cargps/src/main/resources/cargps_model.yaml    |   2 +-
 .../nlpcraft/probe/mgrs/NCProbeSynonym.scala       |  51 +++++++---
 .../nlpcraft/probe/mgrs/NCProbeVariants.scala      |  30 ++++++
 .../probe/mgrs/nlp/NCProbeEnrichmentManager.scala  |   4 +-
 .../mgrs/nlp/enrichers/model/NCModelEnricher.scala |  20 +++-
 .../mgrs/nlp/enrichers/model/NCSentenceCache.scala | 110 +++++++++++++++++++++
 .../probe/mgrs/sentence/NCSentenceManager.scala    |  40 +++++++-
 7 files changed, 238 insertions(+), 19 deletions(-)

diff --git a/nlpcraft-examples/cargps/src/main/resources/cargps_model.yaml b/nlpcraft-examples/cargps/src/main/resources/cargps_model.yaml
index cd5fb4e..62f45c8 100644
--- a/nlpcraft-examples/cargps/src/main/resources/cargps_model.yaml
+++ b/nlpcraft-examples/cargps/src/main/resources/cargps_model.yaml
@@ -60,7 +60,7 @@ elements:
   - id: "x:addr:st"
     greedy: false
     synonyms:
-      - "{//[a-zA-Z0-9]+//}[1,3]"
+      - "{^^{is_alphanum(tok_txt) && tok_is_between_ids('x:addr:num', 'x:addr:kind') == true}^^}[1,3]"
 
   - id: "x:addr"
     synonyms:
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeSynonym.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeSynonym.scala
index 809c4e5..e324857 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeSynonym.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeSynonym.scala
@@ -20,9 +20,10 @@ package org.apache.nlpcraft.probe.mgrs
 import org.apache.nlpcraft.common.U
 import org.apache.nlpcraft.common.nlp.NCNlpSentenceToken
 import org.apache.nlpcraft.model._
-import org.apache.nlpcraft.model.intent.NCIdlContext
-import org.apache.nlpcraft.probe.mgrs.NCProbeSynonym.NCIdlContent
+import org.apache.nlpcraft.model.intent.{NCIdlContext, NCIdlFunction}
+import org.apache.nlpcraft.probe.mgrs.NCProbeSynonym.{NCIdlContent, saveIdl}
 import org.apache.nlpcraft.probe.mgrs.NCProbeSynonymChunkKind._
+import org.apache.nlpcraft.probe.mgrs.sentence.NCSentenceManager
 
 import scala.collection.mutable
 
@@ -146,8 +147,11 @@ class NCProbeSynonym(
       * @param tow
       * @param chunk
       * @param req
+      * @param variantsToks
       */
-    private def isMatch(tow: NCIdlContent, chunk: NCProbeSynonymChunk, req: NCRequest): Boolean = {
+    private def isMatch(
+        tow: NCIdlContent, chunk: NCProbeSynonymChunk, req: NCRequest, variantsToks: Seq[Seq[NCToken]]
+    ): Boolean = {
         def get0[T](fromToken: NCToken => T, fromWord: NCNlpSentenceToken => T): T =
             if (tow.isLeft) fromToken(tow.swap.toOption.get) else fromWord(tow.toOption.get)
 
@@ -160,7 +164,20 @@ class NCProbeSynonym(
                 r.matcher(get0(_.origText, _.origText)).matches() || r.matcher(get0(_.normText, _.normText)).matches()
 
             case IDL =>
-                get0(t => chunk.idlPred.apply(t, NCIdlContext(req = req)).value.asInstanceOf[Boolean], _ => false)
+                val ok =
+                    variantsToks.exists(variantToks =>
+                        get0(t =>
+                            chunk.idlPred.apply(
+                                t,
+                                NCIdlContext(req = req, toks = variantToks)
+                            ).value.asInstanceOf[Boolean], _ => false
+                        )
+                    )
+
+                if (ok)
+                    saveIdl(req, tow.swap.toOption.get, chunk.idlPred)
+
+                ok
 
             case _ => throw new AssertionError()
         }
@@ -188,17 +205,20 @@ class NCProbeSynonym(
       *
       * @param tows
       * @param req
+      * @param variantsToks
       * @return
       */
-    def isMatch(tows: Seq[NCIdlContent], req: NCRequest): Boolean = {
+    def isMatch(tows: Seq[NCIdlContent], req: NCRequest, variantsToks: Seq[Seq[NCToken]]): Boolean= {
         require(tows != null)
 
         if (tows.length == length && tows.count(_.isLeft) >= idlChunks)
-            tows.zip(this).sortBy(p => getSort(p._2.kind)).forall { case (tow, chunk) => isMatch(tow, chunk, req) }
+            tows.zip(this).sortBy(p => getSort(p._2.kind)).forall {
+                case (tow, chunk) => isMatch(tow, chunk, req, variantsToks)
+            }
         else
             false
     }
-    
+
     /**
       *
       * @param toks
@@ -214,15 +234,16 @@ class NCProbeSynonym(
       *
       * @param tows
       * @param req
+      * @param variantsToks
       */
-    def sparseMatch(tows: Seq[NCIdlContent], req: NCRequest): Option[Seq[NCIdlContent]] = {
+    def sparseMatch(tows: Seq[NCIdlContent], req: NCRequest, variantsToks: Seq[Seq[NCToken]]): Option[Seq[NCIdlContent]] = {
         require(tows != null)
         require(req != null)
         require(hasIdl)
 
         sparseMatch0(
             tows,
-            (t: NCIdlContent, chunk: NCProbeSynonymChunk) => isMatch(t, chunk, req),
+            (t: NCIdlContent, chunk: NCProbeSynonymChunk) => isMatch(t, chunk, req, variantsToks),
             (t: NCIdlContent) => if (t.isLeft) t.swap.toOption.get.getStartCharIndex else t.toOption.get.startCharIndex,
             shouldBeNeighbors = !sparse
         )
@@ -340,9 +361,17 @@ object NCProbeSynonym {
         permute: Boolean
     ): NCProbeSynonym = {
         val syn = new NCProbeSynonym(isElementId, isValueName, isDirect, value, sparse, permute)
-        
+
         syn ++= chunks
-        
+
         syn
     }
+
+    /**
+      *
+      * @param req
+      * @param tok
+      * @param idlPred
+      */
+    def saveIdl(req: NCRequest, tok: NCToken, idlPred: NCIdlFunction): Unit = NCSentenceManager.saveIdl(req, tok, idlPred)
 }
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeVariants.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeVariants.scala
index bcf2c9c..39f6969 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeVariants.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeVariants.scala
@@ -22,6 +22,8 @@ import org.apache.nlpcraft.common.nlp.{NCNlpSentence => NlpSentence, NCNlpSenten
 import org.apache.nlpcraft.common.{NCE, TOK_META_ALIASES_KEY}
 import org.apache.nlpcraft.model.NCVariant
 import org.apache.nlpcraft.model.impl.{NCTokenImpl, NCTokenLogger, NCVariantImpl}
+import org.apache.nlpcraft.model.intent.NCIdlContext
+import org.apache.nlpcraft.probe.mgrs.sentence.NCSentenceManager
 
 import java.io.{Serializable => JSerializable}
 import java.util
@@ -267,6 +269,34 @@ object NCProbeVariants {
                 for ((tok, tokNlp) <- toks.zip(nlpSen) if tokNlp.isUser)
                     process(tok, tokNlp)
 
+                if (ok) {
+                    NCSentenceManager.getIdlData(srvReqId) match {
+                        case Some((req, toksData)) =>
+                            ok =
+                                toks.forall(t =>
+                                    toksData.get((t, t.getId)) match {
+                                        case Some(f) =>
+                                            val x =
+                                            f.apply(
+                                                t,
+                                                NCIdlContext(req = req, toks = toks.toSeq)
+                                            ).value.asInstanceOf[Boolean]
+
+
+                                            if (!x)
+                                                println("x="+x + ", t=" + t  + ", toks=" + toks)
+                                            x
+
+
+                                        case None => true
+                                    }
+                                )
+
+                        case None =>  // No-op.
+
+                    }
+                }
+
                 if (ok) Some(new NCVariantImpl(toks.asJava)) else None
             })
 
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/NCProbeEnrichmentManager.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/NCProbeEnrichmentManager.scala
index 4b6c697..9af0c61 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/NCProbeEnrichmentManager.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/NCProbeEnrichmentManager.scala
@@ -526,8 +526,6 @@ object NCProbeEnrichmentManager extends NCService with NCOpenCensusModelStats {
             )
         })
 
-        NCSentenceManager.clearCache(srvReqId)
-
         // Final validation before execution.
         try
             sensSeq.foreach(NCValidateManager.postValidate(mdl, _, span))
@@ -556,6 +554,8 @@ object NCProbeEnrichmentManager extends NCService with NCOpenCensusModelStats {
 
         var senVars = NCProbeVariants.convert(srvReqId, mdl, sensSeq, lastPhase = true)
 
+        NCSentenceManager.clearCache(srvReqId)
+
         // Sentence variants can be filtered by model.
         val fltSenVars: Seq[(NCVariant, Int)] =
             senVars.
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
index 9706c4c..7a11806 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
@@ -28,7 +28,7 @@ import org.apache.nlpcraft.probe.mgrs.NCProbeSynonymChunkKind.NCSynonymChunkKind
 import org.apache.nlpcraft.probe.mgrs.nlp.NCProbeEnricher
 import org.apache.nlpcraft.probe.mgrs.nlp.impl.NCRequestImpl
 import org.apache.nlpcraft.probe.mgrs.sentence.NCSentenceManager
-import org.apache.nlpcraft.probe.mgrs.{NCProbeModel,  NCProbeVariants, NCTokenPartKey, NCProbeSynonym => Synonym}
+import org.apache.nlpcraft.probe.mgrs.{NCProbeModel, NCProbeVariants, NCTokenPartKey, NCProbeSynonym => Synonym}
 
 import java.io.Serializable
 import java.util.{List => JList}
@@ -526,8 +526,21 @@ object NCModelEnricher extends NCProbeEnricher {
             "enrich", parent, "srvReqId" -> ns.srvReqId, "mdlId" -> mdl.model.getId, "txt" -> ns.text
         ) { span =>
             val req = NCRequestImpl(senMeta, ns.srvReqId)
+
             val combToks = combosTokens(ns.toSeq)
             lazy val ch = mkComplexes(mdl, ns)
+            lazy val variantsToks =
+                ch.complexes.map(p => p.tokensComplexes.map(p =>
+                    if (p.isToken)
+                        p.token
+                    else {
+                        // TODO: everywhere
+                        val clone = p.word.clone()
+
+                        clone.filter(!_.isNlp).foreach(clone.remove)
+
+                        NCTokenImpl(mdl, ns.srvReqId, clone)
+                    }))
 
             def execute(simpleEnabled: Boolean, idlEnabled: Boolean): Unit =
                 startScopedSpan(
@@ -603,6 +616,7 @@ object NCModelEnricher extends NCProbeEnricher {
                             val allSyns = get(mdl.idlSynonyms, eId)
                             lazy val allCombs = mkCombinations(ch, toks, idlCache)
 
+
                             // 2.1 Continuous.
                             if (!mdl.hasSparseSynonyms) {
                                 var found = false
@@ -613,7 +627,7 @@ object NCModelEnricher extends NCProbeEnricher {
                                     if !found;
                                     data = comb.map(_.data)
                                 )
-                                    if (s.isMatch(data, req)) {
+                                    if (s.isMatch(data, req, variantsToks)) {
                                         val parts = toParts(mdl, ns.srvReqId, data, s)
 
                                         add("IDL continuous", ns, contCache, eId, greedy, toksExt, idxs, s, parts)
@@ -629,7 +643,7 @@ object NCModelEnricher extends NCProbeEnricher {
                                     s <- allSyns;
                                     comb <- allCombs
                                 )
-                                    s.sparseMatch(comb.map(_.data), req) match {
+                                    s.sparseMatch(comb.map(_.data), req, variantsToks) match {
                                         case Some(res) =>
                                             val typ = if (s.sparse) "IDL sparse" else "IDL continuous"
 
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCSentenceCache.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCSentenceCache.scala
new file mode 100644
index 0000000..e5b6e3e
--- /dev/null
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCSentenceCache.scala
@@ -0,0 +1,110 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nlpcraft.probe.mgrs.nlp.enrichers.model
+
+import org.apache.nlpcraft.common.nlp.{NCNlpSentenceToken => NlpToken}
+import org.apache.nlpcraft.probe.mgrs.{NCProbeSynonym => Synonym}
+import org.apache.nlpcraft.probe.mgrs.NCProbeSynonym.{NCIdlContent => IdlToken}
+import org.apache.nlpcraft.model.NCRequest
+import scala.collection.mutable
+
+class NCSentenceCache {
+//    case class Key(elemId: String, indexes: Seq[Int])
+//    case class Value[T](synonym: Synonym, result: Seq[T])
+//
+//    val cacheToks = mutable.HashMap.empty[Key, mutable.HashMap[Seq[Int], Value[NlpToken]]]
+//    val cacheIdl = mutable.HashMap.empty[Key, mutable.HashMap[Seq[Int], Value[IdlToken]]]
+//
+//    var cacheHits = 0
+//    var cacheCnt = 0
+//    var time = 0L
+//
+//    private def process[T](
+//        elemId: String,
+//        elemSyns: Seq[Synonym],
+//        toks: Seq[T],
+//        extract: (Synonym, Seq[T]) => Option[Seq[T]],
+//        cache: mutable.Map[Key, mutable.HashMap[Seq[Int], Value[T]]],
+//        getIndex: T => Int,
+//        callback: (Synonym, Seq[T]) => Unit
+//    ): Unit = {
+//        val t = System.currentTimeMillis()
+//
+//        val hash = toks.map(getIndex)
+//        val key = Key(elemId, hash)
+//
+//        cacheCnt += 1
+//
+//        cache.get(key) match {
+//            case Some(data) =>
+//                cacheHits += 1
+//                data.get(hash) match {
+//                    case Some(v) => callback(v.synonym, v.result)
+//                    case None => // No-op.
+//                }
+//            case None =>
+//                // mutable.HashMap.empty[Key[IdlToken], Map[Seq[IdlToken], Value[IdlToken]]]
+//                val hit = mutable.HashMap.empty[Seq[Int], Value[T]]
+//
+//                for (s <- elemSyns)
+//                    extract(s, toks) match {
+//                        case Some(res) =>
+//                            callback(s, res)
+//                            hit += hash -> Value(s, res)
+//                        case None => // No-op.
+//                    }
+//
+//                cache += key -> hit
+//        }
+//
+//        time += (System.currentTimeMillis() - t)
+//    }
+//
+//    def processSparseTokens(
+//        elemId: String,
+//        elemSyns: Seq[Synonym],
+//        toks: Seq[NlpToken],
+//        callback: (Synonym, Seq[NlpToken]) => Unit
+//    ): Unit =
+//        process(
+//            elemId,
+//            elemSyns,
+//            toks,
+//            (s: Synonym, toks: Seq[NlpToken]) => s.sparseMatch(toks),
+//            cacheToks,
+//            (t: NlpToken) => t.index,
+//            callback
+//        )
+//
+//    def processSparseIdl(
+//        elemId: String,
+//        req: NCRequest,
+//        elemSyns: Seq[Synonym],
+//        toks: Seq[IdlToken],
+//        callback: (Synonym, Seq[IdlToken]) => Unit
+//    ): Unit =
+//        process(
+//            elemId,
+//            elemSyns,
+//            toks,
+//            (s: Synonym, toks: Seq[IdlToken]) => s.sparseMatch(toks, req),
+//            cacheIdl,
+//            (t: IdlToken) => if (t.isRight) t.toOption.get.index else t.swap.toOption.get.getIndex,
+//            callback
+//        )
+}
\ No newline at end of file
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/sentence/NCSentenceManager.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/sentence/NCSentenceManager.scala
index ee8b719..b0a077a 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/sentence/NCSentenceManager.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/sentence/NCSentenceManager.scala
@@ -22,7 +22,8 @@ import org.apache.nlpcraft.common.nlp.NCNlpSentence.NoteLink
 import org.apache.nlpcraft.common.nlp.pos.NCPennTreebank
 import org.apache.nlpcraft.common.nlp.{NCNlpSentence, NCNlpSentenceNote, NCNlpSentenceToken}
 import org.apache.nlpcraft.common.{NCE, NCService, U, _}
-import org.apache.nlpcraft.model.NCModel
+import org.apache.nlpcraft.model.intent.NCIdlFunction
+import org.apache.nlpcraft.model.{NCModel, NCRequest, NCToken}
 import org.apache.nlpcraft.probe.mgrs.NCTokenPartKey
 
 import java.io.{Serializable => JSerializable}
@@ -43,6 +44,9 @@ object NCSentenceManager extends NCService {
     type CacheValue = Seq[Seq[NCNlpSentenceNote]]
     private val combCache = mutable.HashMap.empty[String, mutable.HashMap[CacheKey, CacheValue]]
 
+    type IdlCacheKey = (NCToken, String)
+    private val reqCache = mutable.HashMap.empty[String, NCRequest]
+    private val idlCache = mutable.HashMap.empty[String, mutable.HashMap[IdlCacheKey, NCIdlFunction]]
 
     /**
       *
@@ -818,5 +822,37 @@ object NCSentenceManager extends NCService {
       *
       * @param srvReqId
       */
-    def clearCache(srvReqId: String): Unit = combCache -= srvReqId
+    def clearCache(srvReqId: String): Unit = {
+        combCache -= srvReqId
+        reqCache -= srvReqId
+        idlCache -= srvReqId
+    }
+
+    def saveIdl(req: NCRequest, tok: NCToken, idlPred: NCIdlFunction): Unit = {
+        val srvReqId = req.getServerRequestId
+
+        reqCache += srvReqId -> req
+
+        val idlCacheReq: mutable.Map[IdlCacheKey, NCIdlFunction] =
+            idlCache.get(srvReqId) match {
+                case Some(m) => m
+                case None =>
+                    val m  = mutable.HashMap.empty[IdlCacheKey, NCIdlFunction]
+
+                    idlCache += srvReqId -> m
+
+                    m
+            }
+
+        idlCacheReq += (tok, tok.getId) -> idlPred
+    }
+
+    def getIdlData(srvReqId: String) : Option[(NCRequest, Map[IdlCacheKey, NCIdlFunction])] = {
+        val reqData = reqCache.get(srvReqId)
+        val idlData = idlCache.get(srvReqId)
+
+        require(reqData.isDefined && idlData.isDefined || reqData.isEmpty && idlData.isEmpty)
+
+        if (reqData.isDefined) Some((reqData.get, idlData.get.toMap)) else None
+    }
 }
\ No newline at end of file