You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nlpcraft.apache.org by se...@apache.org on 2021/02/16 11:20:44 UTC

[incubator-nlpcraft] branch NLPCRAFT-238 created (now c13801c)

This is an automated email from the ASF dual-hosted git repository.

sergeykamov pushed a change to branch NLPCRAFT-238
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git.


      at c13801c  WIP.

This branch includes the following new commits:

     new c13801c  WIP.

The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.



[incubator-nlpcraft] 01/01: WIP.

Posted by se...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

sergeykamov pushed a commit to branch NLPCRAFT-238
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git

commit c13801cf9f4af2b05f4e716bd0d56ec2401c1dc4
Author: Sergey Kamov <sk...@gmail.com>
AuthorDate: Tue Feb 16 14:20:28 2021 +0300

    WIP.
---
 .../scala/org/apache/nlpcraft/model/NCElement.java |  10 ++
 .../apache/nlpcraft/model/NCModelFileAdapter.java  |  21 +++-
 .../scala/org/apache/nlpcraft/model/NCToken.java   |   2 +-
 .../nlpcraft/model/impl/json/NCElementJson.java    |  19 +++-
 .../probe/mgrs/deploy/NCDeployManager.scala        |  37 +++++--
 .../mgrs/nlp/enrichers/model/NCModelEnricher.scala | 111 ++++++++++++---------
 .../model/conversation/NCConversationSpec.scala    |   9 +-
 .../conversation/NCConversationTimeoutSpec.scala   |   8 +-
 .../mgrs/nlp/enrichers/NCDefaultTestModel.scala    |   1 -
 .../nlpcraft/server/rest/NCRestModelSpec.scala     |   4 +-
 10 files changed, 144 insertions(+), 78 deletions(-)

diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCElement.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCElement.java
index 9ff5bf7..c0f75c8 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCElement.java
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCElement.java
@@ -327,4 +327,14 @@ public interface NCElement extends NCMetadata, Serializable {
     default NCValueLoader getValueLoader() {
         return null;
     }
+
+    // TODO:
+    default Optional<Boolean> isPermutateSynonyms() {
+        return Optional.empty();
+    }
+
+    // TODO:
+    default Optional<Integer> getJiggleFactor() {
+        return Optional.empty();
+    }
 }
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModelFileAdapter.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModelFileAdapter.java
index 883dfac..d3d4d16 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModelFileAdapter.java
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModelFileAdapter.java
@@ -116,7 +116,7 @@ abstract public class NCModelFileAdapter extends NCModelAdapter {
         this.abstractToks = convert(proxy.getAbstractTokens(), Collections.emptySet());
         this.addStopwords = convert(proxy.getAdditionalStopWords(), null);
         this.exclStopwords = convert(proxy.getExcludedStopWords(), null);
-        this.elems = convertElements(proxy.getElements());
+        this.elems = convertElements(proxy, proxy.getElements());
         this.macros = convertMacros(proxy.getMacros());
         this.metadata = convertMeta(proxy.getMetadata());
         this.intents = convert(proxy.getIntents(), null);
@@ -140,7 +140,7 @@ abstract public class NCModelFileAdapter extends NCModelAdapter {
         
         if (in == null)
             try {
-                in = new FileInputStream(new File(filePath));
+                in = new FileInputStream(filePath);
             }
             catch (FileNotFoundException e) {
                 // Ignore.
@@ -245,10 +245,11 @@ abstract public class NCModelFileAdapter extends NCModelAdapter {
 
     /**
      *
+     * @param proxy
      * @param arr
      * @return
      */
-    private static Set<NCElement> convertElements(NCElementJson[] arr) {
+    private static Set<NCElement> convertElements(NCModelJson proxy, NCElementJson[] arr) {
         if (arr == null)
             return Collections.emptySet();
 
@@ -326,6 +327,20 @@ abstract public class NCModelFileAdapter extends NCModelAdapter {
                                 loaders.computeIfAbsent(js.getValueLoader(), this::mkLoader) :
                                 null;
                         }
+
+                        @Override
+                        public Optional<Boolean> isPermutateSynonyms() {
+                            return nvl(js.isPermutateSynonyms(), proxy.isPermutateSynonyms());
+                        }
+
+                        @Override
+                        public Optional<Integer> getJiggleFactor() {
+                            return nvl(js.getJiggleFactor(), proxy.getJiggleFactor());
+                        }
+
+                        private<T> Optional<T> nvl(T t, T dflt) {
+                            return Optional.of(t != null ? t : dflt);
+                        }
                     };
             }).collect(Collectors.toSet());
     }
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCToken.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCToken.java
index fe7a442..345a2ad 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCToken.java
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCToken.java
@@ -309,7 +309,7 @@ public interface NCToken extends NCMetadata {
         String id = getId();
         int i = id.indexOf(':');
 
-        return i <=0 || !"nlpcraft google opennlp spacy stanford".contains(id.substring(0, i));
+        return i <= 0 || !"nlpcraft google opennlp spacy stanford".contains(id.substring(0, i));
     }
 
     /**
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/json/NCElementJson.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/json/NCElementJson.java
index ab8c1e8..7419938 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/json/NCElementJson.java
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/json/NCElementJson.java
@@ -17,7 +17,8 @@
 
 package org.apache.nlpcraft.model.impl.json;
 
-import java.util.*;
+import java.util.HashMap;
+import java.util.Map;
 
 /**
  * Parsing bean.
@@ -31,6 +32,10 @@ public class NCElementJson {
     private Map<String, Object> metadata = new HashMap<>();
     private NCValueJson[] values = new NCValueJson[0];
     private String valueLoader;
+    // Can be null.
+    private Boolean isPermutateSynonyms;
+    // Can be null.
+    private Integer jiggleFactor;
 
     public String getParentId() {
         return parentId;
@@ -80,4 +85,16 @@ public class NCElementJson {
     public void setValueLoader(String valueLoader) {
         this.valueLoader = valueLoader;
     }
+    public Boolean isPermutateSynonyms() {
+        return isPermutateSynonyms;
+    }
+    public void setPermutateSynonyms(Boolean permutateSynonyms) {
+        isPermutateSynonyms = permutateSynonyms;
+    }
+    public Integer getJiggleFactor() {
+        return jiggleFactor;
+    }
+    public void setJiggleFactor(Integer jiggleFactor) {
+        this.jiggleFactor = jiggleFactor;
+    }
 }
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCDeployManager.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCDeployManager.scala
index 0c51051..8a638ae 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCDeployManager.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCDeployManager.scala
@@ -41,6 +41,7 @@ import org.apache.nlpcraft.probe.mgrs.model.NCModelSynonymDslCompiler
 import resource.managed
 
 import scala.collection.JavaConverters._
+import scala.compat.java8.OptionConverters._
 import scala.collection.convert.DecorateAsScala
 import scala.collection.{Map, Seq, Set, mutable}
 import scala.collection.mutable.{ArrayBuffer, ListBuffer}
@@ -140,13 +141,32 @@ object NCDeployManager extends NCService with DecorateAsScala {
 
         val mdlId = mdl.getId
 
-        for (elm ← mdl.getElements.asScala)
+        for (elm ← mdl.getElements.asScala) {
             if (!elm.getId.matches(ID_REGEX))
-                throw new NCE(s"Model element ID does not match regex [" +
-                    s"mdlId=$mdlId, " +
-                    s"elmId=${elm.getId}, " +
-                    s"regex=$ID_REGEX" +
-                s"]")
+                throw new NCE(
+                s"Model element ID does not match regex [" +
+                s"mdlId=$mdlId, " +
+                s"elmId=${elm.getId}, " +
+                s"regex=$ID_REGEX" +
+                s"]"
+            )
+
+            elm.getJiggleFactor.asScala match {
+                case Some(elemJiggleFactor) ⇒
+                    // TODO:
+                    if (elemJiggleFactor < 0 || elemJiggleFactor > mdl.getJiggleFactor)
+                        throw new NCE(
+                            s"Model element has incorrect jiggle factor value, it should be positive 0 and less or equal to model value [" +
+                                s"mdlId=$mdlId, " +
+                                s"elmId=${elm.getId}, " +
+                                s"mdlJiggleFactor=${mdl.getJiggleFactor}, " +
+                                s"elmJiggleFactor=$elemJiggleFactor" +
+                                s"]"
+                        )
+
+                case None ⇒ // No-op.
+            }
+        }
 
         checkMacros(mdl)
 
@@ -234,7 +254,10 @@ object NCDeployManager extends NCService with DecorateAsScala {
                         )
                 }
 
-                if (mdl.isPermutateSynonyms && !isElementId && chunks.forall(_.wordStem != null))
+                if (
+                    elm.isPermutateSynonyms.orElse(mdl.isPermutateSynonyms) &&
+                    !isElementId && chunks.forall(_.wordStem != null)
+                )
                     simplePermute(chunks).map(p ⇒ p.map(_.wordStem) → p).toMap.values.foreach(p ⇒ add(p, p == chunks))
                 else
                     add(chunks, isDirect = true)
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
index fe1cac5..96714a8 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
@@ -19,10 +19,9 @@ package org.apache.nlpcraft.probe.mgrs.nlp.enrichers.model
 
 import java.io.Serializable
 import java.util
-
 import io.opencensus.trace.Span
 import org.apache.nlpcraft.common._
-import org.apache.nlpcraft.common.nlp.{NCNlpSentenceToken, _}
+import org.apache.nlpcraft.common.nlp.{NCNlpSentenceToken, NCNlpSentenceTokenBuffer, _}
 import org.apache.nlpcraft.model._
 import org.apache.nlpcraft.probe.mgrs.nlp.NCProbeEnricher
 import org.apache.nlpcraft.probe.mgrs.nlp.impl.NCRequestImpl
@@ -100,6 +99,10 @@ object NCModelEnricher extends NCProbeEnricher with DecorateAsScala {
         }
     }
 
+    case class BufferHolder(iterator: Iterator[NCNlpSentenceTokenBuffer], jiggleFactor: Int) {
+        def foreach(f: (NCNlpSentenceTokenBuffer, Int) ⇒ Unit): Unit = iterator.foreach(f(_, jiggleFactor))
+    }
+
     /**
      *
      * @param parent Optional parent span.
@@ -127,55 +130,61 @@ object NCModelEnricher extends NCProbeEnricher with DecorateAsScala {
       * @param factor Distance of left or right jiggle, i.e. how far can an individual token move
       *         left or right in the sentence.
       */
-    private def jiggle(ns: NCNlpSentenceTokenBuffer, factor: Int): Iterator[NCNlpSentenceTokenBuffer] = {
+    private def jiggle(ns: NCNlpSentenceTokenBuffer, factor: Int): BufferHolder = {
         require(factor >= 0)
-        if (ns.isEmpty)
-            Iterator.empty
-        else if (factor == 0)
-            Iterator.apply(ns)
-        else
-            new Iterator[NCNlpSentenceTokenBuffer] {
-                private val min = -factor
-
-                private val max = factor
-
-                private val sz = ns.size
-
-                private var i = 0 // Token index.
-                private var d = 0 // Jiggle amount [min, max].
-
-                private var isNext = sz > 0
-
-                private def calcNext(): Unit = {
-                    isNext = false
-                    d += 1
-                    while (i < sz && !isNext) {
-                        while (d <= max && !isNext) {
-                            val p = i + d
-                            if (p >= 0 && p < sz) // Valid new position?
-                                isNext = true
-                            else
-                                d += 1
-                        }
-                        if (!isNext) {
-                            d = min
-                            i += 1
+
+        val buf =
+            if (ns.isEmpty)
+                Iterator.empty
+            else if (factor == 0)
+                Iterator.apply(ns)
+            else
+                new Iterator[NCNlpSentenceTokenBuffer] {
+                    private val min = -factor
+                    private val max = factor
+                    private val sz = ns.size
+
+                    private var i = 0 // Token index.
+                    private var d = 0 // Jiggle amount [min, max].
+                    private var isNext = sz > 0
+
+                    private def calcNext(): Unit = {
+                        isNext = false
+                        d += 1
+
+                        while (i < sz && !isNext) {
+                            while (d <= max && !isNext) {
+                                val p = i + d
+
+                                if (p >= 0 && p < sz) // Valid new position?
+                                    isNext = true
+                                else
+                                    d += 1
+                            }
+                            if (!isNext) {
+                                d = min
+                                i += 1
+                            }
                         }
                     }
-                }
 
-                override def hasNext: Boolean = isNext
+                    override def hasNext: Boolean = isNext
+
+                    override def next(): NCNlpSentenceTokenBuffer = {
+                        require(isNext)
+
+                        val buf = NCNlpSentenceTokenBuffer(ns)
+
+                        if (d != 0)
+                            buf.insert(i + d, buf.remove(i)) // Jiggle.
 
-                override def next(): NCNlpSentenceTokenBuffer = {
-                    require(isNext)
-                    val buf = NCNlpSentenceTokenBuffer(ns)
-                    if (d != 0)
-                        buf.insert(i + d, buf.remove(i)) // Jiggle.
+                        calcNext()
 
-                    calcNext()
-                    buf
+                        buf
+                    }
                 }
-            }
+
+        BufferHolder(buf, factor)
     }
 
     /**
@@ -318,7 +327,7 @@ object NCModelEnricher extends NCProbeEnricher with DecorateAsScala {
             "srvReqId" → ns.srvReqId,
             "mdlId" → mdl.model.getId,
             "txt" → ns.text) { span ⇒
-            val jiggleFactor = mdl.model.getJiggleFactor
+            val mdlJiggleFactor = mdl.model.getJiggleFactor
             val cache = mutable.HashSet.empty[Seq[Int]]
             val matches = ArrayBuffer.empty[ElementMatch]
 
@@ -354,8 +363,9 @@ object NCModelEnricher extends NCProbeEnricher with DecorateAsScala {
             /**
               *
               * @param perm Permutation to process.
+              * @param mdlJiggleFactor Model jiggle factor.
               */
-            def procPerm(perm: NCNlpSentenceTokenBuffer): Unit = {
+            def procPerm(perm: NCNlpSentenceTokenBuffer, mdlJiggleFactor: Int): Unit = {
                 permCnt += 1
 
                 for (toks ← combos(perm)) {
@@ -371,7 +381,10 @@ object NCModelEnricher extends NCProbeEnricher with DecorateAsScala {
                             def addMatch(
                                 elm: NCElement, toks: Seq[NCNlpSentenceToken], syn: NCProbeSynonym, parts: Seq[NCToken]
                             ): Unit =
-                                if (!matches.exists(m ⇒ m.element == elm && m.isSubSet(toks.toSet))) {
+                                if (
+                                    (elm.getJiggleFactor.isEmpty || elm.getJiggleFactor.get() <= mdlJiggleFactor) &&
+                                    !matches.exists(m ⇒ m.element == elm && m.isSubSet(toks.toSet))
+                                ) {
                                     found = true
 
                                     matches += ElementMatch(elm, toks, syn, parts)
@@ -410,8 +423,8 @@ object NCModelEnricher extends NCProbeEnricher with DecorateAsScala {
                 "mdlId" → mdl.model.getId,
                 "txt" → ns.text) { _ ⇒
                 // Iterate over depth-limited permutations of the original sentence with and without stopwords.
-                jiggle(ns, jiggleFactor).foreach(procPerm)
-                jiggle(NCNlpSentenceTokenBuffer(ns.filter(!_.isStopWord)), jiggleFactor).foreach(procPerm)
+                jiggle(ns, mdlJiggleFactor).foreach(procPerm)
+                jiggle(NCNlpSentenceTokenBuffer(ns.filter(!_.isStopWord)), mdlJiggleFactor).foreach(procPerm)
             }
 
             if (DEEP_DEBUG)
diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/model/conversation/NCConversationSpec.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/model/conversation/NCConversationSpec.scala
index b3837da..fff7ead 100644
--- a/nlpcraft/src/test/scala/org/apache/nlpcraft/model/conversation/NCConversationSpec.scala
+++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/model/conversation/NCConversationSpec.scala
@@ -17,14 +17,12 @@
 
 package org.apache.nlpcraft.model.conversation
 
-import java.util
-import java.util.Collections
-
 import org.apache.nlpcraft.model.{NCElement, NCIntent, NCModel, NCResult}
 import org.apache.nlpcraft.{NCTestContext, NCTestEnvironment}
 import org.junit.jupiter.api.Assertions.{assertFalse, assertTrue}
 import org.junit.jupiter.api.Test
 
+import java.util
 import scala.collection.JavaConverters._
 
 /**
@@ -35,10 +33,7 @@ class NCConversationSpecModel extends NCModel {
     override def getName: String = this.getClass.getSimpleName
     override def getVersion: String = "1.0.0"
 
-    private def mkElement(id: String): NCElement = new NCElement {
-        override def getId: String = id
-        override def getSynonyms: util.List[String] = Collections.singletonList(id)
-    }
+    private def mkElement(id: String): NCElement = new NCElement { override def getId: String = id }
 
     override def getElements: util.Set[NCElement] = Set(mkElement("test1"), mkElement("test2")).asJava
 
diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/model/conversation/NCConversationTimeoutSpec.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/model/conversation/NCConversationTimeoutSpec.scala
index 6c6869e..4c5c181 100644
--- a/nlpcraft/src/test/scala/org/apache/nlpcraft/model/conversation/NCConversationTimeoutSpec.scala
+++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/model/conversation/NCConversationTimeoutSpec.scala
@@ -46,13 +46,7 @@ class NCTimeoutSpecModel extends NCModel {
     override def getConversationTimeout: Long = TIMEOUT
 
     override def getElements: util.Set[NCElement] =
-        Collections.singleton(
-            new NCElement {
-                override def getId: String = "test"
-
-                override def getSynonyms: util.List[String] = Collections.singletonList("test")
-            }
-        )
+        Collections.singleton(new NCElement { override def getId: String = "test" })
 
     @NCIntent("intent=req term~{id == 'test'}")
     def onMatch(ctx: NCIntentMatch): NCResult = {
diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/NCDefaultTestModel.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/NCDefaultTestModel.scala
index fe03c64..15f9ed1 100644
--- a/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/NCDefaultTestModel.scala
+++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/NCDefaultTestModel.scala
@@ -53,7 +53,6 @@ class NCDefaultTestModel extends NCModelAdapter(ID, "Model enrichers test", "1.0
     private def mkValueElement(id: String, vals: String*): NCElement =
         new NCElement {
             override def getId: String = id
-            override def getSynonyms: util.List[String] = Collections.singletonList(id)
             override def getValues: util.List[NCValue] = vals.map(v ⇒ new NCValue {
                 override def getName: String = v
                 override def getSynonyms: util.List[String] = Collections.singletonList(v)
diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/server/rest/NCRestModelSpec.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/server/rest/NCRestModelSpec.scala
index 5c337ce..2327b0d 100644
--- a/nlpcraft/src/test/scala/org/apache/nlpcraft/server/rest/NCRestModelSpec.scala
+++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/server/rest/NCRestModelSpec.scala
@@ -20,7 +20,7 @@ package org.apache.nlpcraft.server.rest
 import org.apache.nlpcraft.NCTestEnvironment
 import org.apache.nlpcraft.examples.alarm.AlarmModel
 import org.junit.jupiter.api.Assertions._
-import org.junit.jupiter.api.{Disabled, Test}
+import org.junit.jupiter.api.Test
 
 import scala.collection.JavaConverters._
 
@@ -37,7 +37,7 @@ class NCRestModelSpec extends NCRestSpec {
         // Note that checked values are valid for current configuration of `nlpcraft.alarm.ex` model.
         post("model/sugsyn", "mdlId" → "nlpcraft.alarm.ex")(
             ("$.status", (status: String) ⇒ assertEquals("API_OK", status)),
-            ("$.result.suggestions[:1].x:alarm.*", (data: java.util.List[java.util.Map[String, Object]]) ⇒ {
+              ("$.result.suggestions[:1].x:alarm.*", (data: java.util.List[java.util.Map[String, Object]]) ⇒ {
                 val scores = extract(data)
 
                 assertTrue(scores.nonEmpty)