You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nlpcraft.apache.org by se...@apache.org on 2021/02/25 13:40:02 UTC

[incubator-nlpcraft] branch NLPCRAFT-253 created (now 341f57a)

This is an automated email from the ASF dual-hosted git repository.

sergeykamov pushed a change to branch NLPCRAFT-253
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git.


      at 341f57a  WIP.

This branch includes the following new commits:

     new 341f57a  WIP.

The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.



[incubator-nlpcraft] 01/01: WIP.

Posted by se...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

sergeykamov pushed a commit to branch NLPCRAFT-253
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git

commit 341f57a5761eb0f8c062e2f64cf58340ee1acb79
Author: Sergey Kamov <sk...@gmail.com>
AuthorDate: Thu Feb 25 16:39:48 2021 +0300

    WIP.
---
 .../test/impl/NCTestAutoModelValidatorImpl.scala   | 18 ++++++---
 .../apache/nlpcraft/probe/mgrs/NCProbeModel.scala  |  2 +-
 ...obeModel.scala => NCProbeSynonymsWrapper.scala} | 47 +++++++++++-----------
 .../probe/mgrs/deploy/NCDeployManager.scala        |  4 +-
 .../nlpcraft/probe/mgrs/model/NCModelManager.scala |  2 +-
 .../mgrs/nlp/enrichers/model/NCModelEnricher.scala | 31 +++++++++++---
 6 files changed, 64 insertions(+), 40 deletions(-)

diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/tools/test/impl/NCTestAutoModelValidatorImpl.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/tools/test/impl/NCTestAutoModelValidatorImpl.scala
index 30dca04..36a9cca 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/tools/test/impl/NCTestAutoModelValidatorImpl.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/tools/test/impl/NCTestAutoModelValidatorImpl.scala
@@ -66,7 +66,8 @@ private [test] object NCTestAutoModelValidatorImpl extends LazyLogging {
             intentId: String,
             text: String,
             pass: Boolean,
-            error: Option[String]
+            error: Option[String],
+            time: Long
         )
 
         val results = samples.flatMap { case (mdlId, samples) ⇒
@@ -77,14 +78,18 @@ private [test] object NCTestAutoModelValidatorImpl extends LazyLogging {
                     cli.open(mdlId)
 
                     txts.map (txt ⇒ {
+                        var t = System.currentTimeMillis()
+
                         val res = cli.ask(txt)
 
+                        t = System.currentTimeMillis() - t
+
                         if (res.isFailed)
-                            Result(mdlId, intentId, txt, pass = false, Some(res.getResultError.get()))
+                            Result(mdlId, intentId, txt, pass = false, Some(res.getResultError.get()), t)
                         else if (intentId != res.getIntentId)
-                            Result(mdlId, intentId, txt, pass = false, Some(s"Unexpected intent ID '${res.getIntentId}'"))
+                            Result(mdlId, intentId, txt, pass = false, Some(s"Unexpected intent ID '${res.getIntentId}'"), t)
                         else
-                            Result(mdlId, intentId, txt, pass = true, None)
+                            Result(mdlId, intentId, txt, pass = true, None, t)
                     })
                 }
                 finally
@@ -96,7 +101,7 @@ private [test] object NCTestAutoModelValidatorImpl extends LazyLogging {
 
         val tbl = NCAsciiTable()
 
-        tbl #= ("Model ID", "Intent ID", "+/-", "Text", "Error")
+        tbl #= ("Model ID", "Intent ID", "+/-", "Text", "Error", "Execution time")
 
         for (res ← results)
             tbl += (
@@ -104,7 +109,8 @@ private [test] object NCTestAutoModelValidatorImpl extends LazyLogging {
                 res.intentId,
                 if (res.pass) g("OK") else r("FAIL"),
                 res.text,
-                res.error.getOrElse("")
+                res.error.getOrElse(""),
+                res.time
             )
         
         val passCnt = results.count(_.pass)
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeModel.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeModel.scala
index fae4496..acc2021 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeModel.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeModel.scala
@@ -39,7 +39,7 @@ case class NCProbeModel(
     model: NCModel,
     solver: NCIntentSolver,
     intents: Seq[NCDslIntent],
-    synonyms: Map[String /*Element ID*/ , Map[Int /*Synonym length*/ , Seq[NCProbeSynonym]]], // Fast access map.
+    synonyms: Map[String /*Element ID*/ , Map[Int /*Synonym length*/ , NCProbeSynonymsWrapper]], // Fast access map.
     synonymsDsl: Map[String /*Element ID*/ , Map[Int /*Synonym length*/ , Seq[NCProbeSynonym]]], // Fast access map.
     addStopWordsStems: Set[String],
     exclStopWordsStems: Set[String],
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeModel.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeSynonymsWrapper.scala
similarity index 50%
copy from nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeModel.scala
copy to nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeSynonymsWrapper.scala
index fae4496..22520be 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeModel.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeSynonymsWrapper.scala
@@ -17,33 +17,32 @@
 
 package org.apache.nlpcraft.probe.mgrs
 
-import org.apache.nlpcraft.model.intent.impl.NCIntentSolver
-import org.apache.nlpcraft.model.intent.utils.NCDslIntent
-import org.apache.nlpcraft.model.{NCElement, NCModel}
-
 import scala.collection.{Map, Seq}
 
 /**
+  * Synonyms sequence holder with optimized access if all synoyms have `text` type.
   *
-  * @param model
-  * @param solver
-  * @param intents
+  * @param isTextOnly
   * @param synonyms
-  * @param synonymsDsl
-  * @param addStopWordsStems
-  * @param exclStopWordsStems
-  * @param suspWordsStems
-  * @param elements
+  * @param synonymsByStems
   */
-case class NCProbeModel(
-    model: NCModel,
-    solver: NCIntentSolver,
-    intents: Seq[NCDslIntent],
-    synonyms: Map[String /*Element ID*/ , Map[Int /*Synonym length*/ , Seq[NCProbeSynonym]]], // Fast access map.
-    synonymsDsl: Map[String /*Element ID*/ , Map[Int /*Synonym length*/ , Seq[NCProbeSynonym]]], // Fast access map.
-    addStopWordsStems: Set[String],
-    exclStopWordsStems: Set[String],
-    suspWordsStems: Set[String],
-    elements: Map[String /*Element ID*/ , NCElement],
-    samples: Map[String, Seq[Seq[String]]]
-)
+class NCProbeSynonymsWrapper(
+    val isTextOnly: Boolean,
+    val synonyms: Seq[NCProbeSynonym],
+    val synonymsByStems: Map[String, NCProbeSynonym]
+) {
+    require(synonyms.isEmpty || synonymsByStems.isEmpty)
+
+    val count: Int = synonyms.size + synonymsByStems.size
+}
+
+object NCProbeSynonymsWrapper {
+    def apply(syns: Seq[NCProbeSynonym]): NCProbeSynonymsWrapper = {
+        val isTextOnly = syns.forall(_.isTextOnly)
+
+        if (isTextOnly)
+            new NCProbeSynonymsWrapper(isTextOnly, Seq.empty, syns.map(s ⇒ s.stems → s).toMap)
+        else
+            new NCProbeSynonymsWrapper(isTextOnly, syns, Map.empty)
+    }
+}
\ No newline at end of file
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCDeployManager.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCDeployManager.scala
index 8d18804..ee8e464 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCDeployManager.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCDeployManager.scala
@@ -36,7 +36,7 @@ import org.apache.nlpcraft.model.factories.basic.NCBasicModelFactory
 import org.apache.nlpcraft.model.intent.impl.{NCIntentDslCompiler, NCIntentSolver}
 import org.apache.nlpcraft.model.intent.utils.NCDslIntent
 import org.apache.nlpcraft.probe.mgrs.NCProbeSynonymChunkKind.{DSL, REGEX, TEXT}
-import org.apache.nlpcraft.probe.mgrs.{NCProbeModel, NCProbeSynonym, NCProbeSynonymChunk}
+import org.apache.nlpcraft.probe.mgrs.{NCProbeModel, NCProbeSynonym, NCProbeSynonymChunk, NCProbeSynonymsWrapper}
 import org.apache.nlpcraft.probe.mgrs.model.NCModelSynonymDslCompiler
 import resource.managed
 
@@ -502,7 +502,7 @@ object NCDeployManager extends NCService with DecorateAsScala {
             model = mdl,
             solver = solver,
             intents = intents.keySet.toSeq,
-            synonyms = mkFastAccessMap(filter(syns, dsl = false)),
+            synonyms = mkFastAccessMap(filter(syns, dsl = false)).map(p ⇒ p._1 → p._2.map(p ⇒ p._1 → NCProbeSynonymsWrapper(p._2))),
             synonymsDsl = mkFastAccessMap(filter(syns, dsl = true)),
             addStopWordsStems = addStopWords.toSet,
             exclStopWordsStems = exclStopWords.toSet,
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/model/NCModelManager.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/model/NCModelManager.scala
index 43f108d..1cc0fc9 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/model/NCModelManager.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/model/NCModelManager.scala
@@ -59,7 +59,7 @@ object NCModelManager extends NCService with DecorateAsScala {
             data.values.foreach(w ⇒ {
                 val mdl = w.model
 
-                val synCnt = w.synonyms.values.flatMap(_.values).flatten.size
+                val synCnt = w.synonyms.flatMap(_._2.map(_._2.count)).sum
                 val elmCnt = w.elements.keySet.size
                 val intentCnt = w.intents.size
 
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
index bea4eaa..9d7b05e 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
@@ -25,7 +25,7 @@ import org.apache.nlpcraft.common.nlp.{NCNlpSentenceToken, NCNlpSentenceTokenBuf
 import org.apache.nlpcraft.model._
 import org.apache.nlpcraft.probe.mgrs.nlp.NCProbeEnricher
 import org.apache.nlpcraft.probe.mgrs.nlp.impl.NCRequestImpl
-import org.apache.nlpcraft.probe.mgrs.{NCProbeModel, NCProbeSynonym, NCProbeVariants}
+import org.apache.nlpcraft.probe.mgrs.{NCProbeModel, NCProbeSynonym, NCProbeSynonymsWrapper, NCProbeVariants}
 
 import scala.collection.JavaConverters._
 import scala.compat.java8.OptionConverters._
@@ -336,7 +336,7 @@ object NCModelEnricher extends NCProbeEnricher with DecorateAsScala {
               * @param len
               * @return
               */
-            def fastAccess(
+            def fastAccessDsl(
                 fastMap: Map[String /*Element ID*/ , Map[Int /*Synonym length*/ , Seq[NCProbeSynonym]]],
                 elmId: String,
                 len: Int): Seq[NCProbeSynonym] =
@@ -345,6 +345,15 @@ object NCModelEnricher extends NCProbeEnricher with DecorateAsScala {
                     case None ⇒ Seq.empty[NCProbeSynonym]
                 }
 
+            def fastAccess(
+                fastMap: Map[String /*Element ID*/ , Map[Int /*Synonym length*/ , NCProbeSynonymsWrapper]],
+                elmId: String,
+                len: Int): Option[NCProbeSynonymsWrapper] =
+                fastMap.get(elmId) match {
+                    case Some(m) ⇒ m.get(len)
+                    case None ⇒ None
+                }
+
             /**
               *
               * @param toks
@@ -388,9 +397,19 @@ object NCModelEnricher extends NCProbeEnricher with DecorateAsScala {
 
                             // Optimization - plain synonyms can be used only on first iteration
                             if (mdl.synonyms.nonEmpty && !ns.exists(_.isUser))
-                                for (syn ← fastAccess(mdl.synonyms, elm.getId, toks.length) if !found)
-                                    if (syn.isMatch(toks))
-                                        addMatch(elm, toks, syn, Seq.empty)
+                                fastAccess(mdl.synonyms, elm.getId, toks.length) match {
+                                    case Some(h) ⇒
+                                        if (h.isTextOnly)
+                                            h.synonymsByStems.get(toks.map(_.stem).mkString(" ")) match {
+                                                case Some(syn) ⇒ addMatch(elm, toks, syn, Seq.empty)
+                                                case None ⇒ // No-op.
+                                            }
+                                        else
+                                            for (syn ← h.synonyms if !found)
+                                                if (syn.isMatch(toks))
+                                                    addMatch(elm, toks, syn, Seq.empty)
+                                    case None ⇒ // No-op.
+                                }
 
                             if (mdl.synonymsDsl.nonEmpty) {
                                 found = false
@@ -403,7 +422,7 @@ object NCModelEnricher extends NCProbeEnricher with DecorateAsScala {
                                 if (seq == null)
                                     seq = convert(ns, collapsedSens, toks)
 
-                                for (comb ← seq; syn ← fastAccess(mdl.synonymsDsl, elm.getId, comb.length) if !found)
+                                for (comb ← seq; syn ← fastAccessDsl(mdl.synonymsDsl, elm.getId, comb.length) if !found)
                                     if (syn.isMatch(comb.map(_.data)))
                                         addMatch(elm, toks, syn, comb.filter(_.isToken).map(_.token))
                             }