You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@nlpcraft.apache.org by se...@apache.org on 2021/09/29 11:52:25 UTC

[incubator-nlpcraft] branch NLPCRAFT-460 created (now 7e2854b)

This is an automated email from the ASF dual-hosted git repository.

sergeykamov pushed a change to branch NLPCRAFT-460
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git.


      at 7e2854b  WIP.

This branch includes the following new commits:

     new 7e2854b  WIP.

The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.

[incubator-nlpcraft] 01/01: WIP.

Posted by se...@apache.org.

This is an automated email from the ASF dual-hosted git repository.

sergeykamov pushed a commit to branch NLPCRAFT-460
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git

commit 7e2854be3fde3b24c81945a7dc751d8b7b02f098
Author: Sergey Kamov <sk...@gmail.com>
AuthorDate: Wed Sep 29 14:51:50 2021 +0300

    WIP.
---
 .../server/sugsyn/NCSuggestSynonymManager.scala    | 92 ++++++++++++----------
 .../nlpcraft/server/rest/NCRestModelSpec.scala     | 31 +++++++-
 .../nlpcraft/server/rest/RestTestModel.scala       |  6 +-
 3 files changed, 81 insertions(+), 48 deletions(-)

diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/sugsyn/NCSuggestSynonymManager.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/sugsyn/NCSuggestSynonymManager.scala
index d89ba98..91b195d 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/sugsyn/NCSuggestSynonymManager.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/sugsyn/NCSuggestSynonymManager.scala
@@ -42,8 +42,8 @@ import scala.jdk.CollectionConverters._
 import scala.util.{Failure, Success}
 
 /**
- * Synonym suggestion manager.
- */
+  * Synonym suggestion manager.
+  */
 object NCSuggestSynonymManager extends NCService {
     // For context word server requests.
     private final val MAX_LIMIT: Int = 10000
@@ -82,40 +82,47 @@ object NCSuggestSynonymManager extends NCService {
 
                 case _ =>
                     throw new NCE(
-                    s"Unexpected HTTP response from `ctxword` server [" +
-                    s"code=$code, " +
-                    s"response=$js" +
-                    s"]"
-                )
+                        s"Unexpected HTTP response from `ctxword` server [" +
+                            s"code=$code, " +
+                            s"response=$js" +
+                            s"]"
+                    )
             }
         }
 
     case class Suggestion(word: String, score: Double)
     case class RequestData(sentence: String, ex: String, elmId: String, index: Int)
-    case class RestRequestSentence(text: String, indexes: util.List[Int])
+    case class RestRequestSentence(text: String, indexes: util.List[Int]) {
+        validate(text, indexes.asScala)
+
+        private def validate(text: String, indexes: Seq[Int]): Unit = {
+            val arr = splitAndNormalize(text)
+
+            require(
+                indexes.forall(i => i >= 0 && i < arr.length),
+                s"Invalid request [text=$text, indexes=${indexes.mkString(",")}"
+            )
+        }
+    }
     case class RestRequest(sentences: util.List[RestRequestSentence], limit: Int, minScore: Double)
     case class Word(word: String, stem: String) {
         require(!word.contains(" "), s"Word cannot contains spaces: $word")
-        require(
-            word.forall(ch =>
-                ch.isLetterOrDigit ||
-                ch == '\'' ||
-                SEPARATORS.contains(ch)
-            ),
-            s"Unsupported symbols: $word"
-        )
+        require(isSuitable4Suggestion(word), s"Unsupported symbols: $word")
     }
     case class SuggestionResult(synonym: String, score: Double)
 
     private def split(s: String): Seq[String] = U.splitTrimFilter(s, " ")
     private def toStem(s: String): String = split(s).map(NCNlpPorterStemmer.stem).mkString(" ")
     private def toStemWord(s: String): String = NCNlpPorterStemmer.stem(s)
+    private def splitAndNormalize(s: String) = s.split(" ").map(_.strip).filter(_.nonEmpty)
+    private def isSuitable4Suggestion(word: String): Boolean =
+        word.forall(ch => ch.isLetterOrDigit || ch == '\'' || SEPARATORS.contains(ch))
 
     /**
-     *
-     * @param seq1
-     * @param seq2
-     */
+      *
+      * @param seq1
+      * @param seq2
+      */
     private def getAllSlices(seq1: Seq[String], seq2: Seq[String]): Seq[Int] = {
         val seq = mutable.Buffer.empty[Int]
 
@@ -131,12 +138,12 @@ object NCSuggestSynonymManager extends NCService {
     }
 
     /**
-     *
-     * @param mdlId
-     * @param minScoreOpt
-     * @param parent
-     * @return
-     */
+      *
+      * @param mdlId
+      * @param minScoreOpt
+      * @param parent
+      * @return
+      */
     def suggest(mdlId: String, minScoreOpt: Option[Double], parent: Span = null): Future[NCSuggestSynonymResult] =
         startScopedSpan("inspect", parent, "mdlId" -> mdlId) { _ =>
             val now = U.now()
@@ -148,8 +155,8 @@ object NCSuggestSynonymManager extends NCService {
                     try {
                         require(
                             m.containsKey("macros") &&
-                            m.containsKey("synonyms") &&
-                            m.containsKey("samples")
+                                m.containsKey("synonyms") &&
+                                m.containsKey("samples")
                         )
 
                         val mdlMacros = m.get("macros").
@@ -187,7 +194,7 @@ object NCSuggestSynonymManager extends NCService {
                             if (allSamplesCnt < MIN_CNT_MODEL)
                                 warns +=
                                     s"Model has too few ($allSamplesCnt) intents samples. " +
-                                    s"Try to increase overall sample count to at least $MIN_CNT_MODEL."
+                                        s"Try to increase overall sample count to at least $MIN_CNT_MODEL."
 
                             else {
                                 val ids =
@@ -198,7 +205,7 @@ object NCSuggestSynonymManager extends NCService {
                                 if (ids.nonEmpty)
                                     warns +=
                                         s"Following model intent have too few samples (${ids.mkString(", ")}). " +
-                                        s"Try to increase overall sample count to at least $MIN_CNT_INTENT."
+                                            s"Try to increase overall sample count to at least $MIN_CNT_INTENT."
                             }
 
                             val parser = new NCMacroParser()
@@ -212,15 +219,18 @@ object NCSuggestSynonymManager extends NCService {
                                 flatMap { case (_, samples) => samples }.
                                 map(ex => SEPARATORS.foldLeft(ex)((s, ch) => s.replaceAll(s"\\$ch", s" $ch "))).
                                 map(ex => {
-                                    val seq = ex.split(" ")
+                                    val seq = splitAndNormalize(ex)
 
                                     seq -> seq.map(toStemWord)
                                 }).
                                 toMap
 
                             val elmSyns =
-                                mdlSyns.map { case (elmId, syns) => elmId -> syns.flatMap(parser.expand) }.
-                                    map { case (id, seq) => id -> seq.map(txt => split(txt).map(p => Word(p, toStemWord(p)))) }
+                                mdlSyns.
+                                    map { case (elmId, syns) => elmId -> syns.flatMap(parser.expand) }.
+                                    map { case (elmId, syns) => elmId -> syns.filter(isSuitable4Suggestion) }.
+                                    filter { case (_, syns) => syns.nonEmpty }.
+                                    map { case (elmId, seq) => elmId -> seq.map(txt => split(txt).map(p => Word(p, toStemWord(p)))) }
 
                             val allReqs =
                                 elmSyns.map {
@@ -276,7 +286,7 @@ object NCSuggestSynonymManager extends NCService {
                                 s"exs=${exs.size}, " +
                                 s"syns=$allSynsCnt, " +
                                 s"reqs=$allReqsCnt" +
-                            s"]")
+                                s"]")
 
                             if (allReqsCnt == 0)
                                 onError(s"Suggestions cannot be generated for model: '$mdlId'")
@@ -441,19 +451,19 @@ object NCSuggestSynonymManager extends NCService {
         }
 
     /**
-     *
-     * @param parent Optional parent span.
-     * @return
-     */
+      *
+      * @param parent Optional parent span.
+      * @return
+      */
     override def start(parent: Span): NCService = startScopedSpan("start", parent) { _ =>
         ackStarting()
         ackStarted()
     }
 
     /**
-     *
-     * @param parent Optional parent span.
-     */
+      *
+      * @param parent Optional parent span.
+      */
     override def stop(parent: Span): Unit = startScopedSpan("stop", parent) { _ =>
         ackStopping()
         ackStopped()
diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/server/rest/NCRestModelSpec.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/server/rest/NCRestModelSpec.scala
index 2bbc9cb..387e8b0 100644
--- a/nlpcraft/src/test/scala/org/apache/nlpcraft/server/rest/NCRestModelSpec.scala
+++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/server/rest/NCRestModelSpec.scala
@@ -17,7 +17,7 @@
 
 package org.apache.nlpcraft.server.rest
 
-import org.apache.nlpcraft.model.NCElement
+import org.apache.nlpcraft.model.{NCElement, NCIntent, NCIntentSample, NCResult}
 import org.apache.nlpcraft.{NCTestElement, NCTestEnvironment}
 import org.junit.jupiter.api.Assertions._
 import org.junit.jupiter.api.Test
@@ -25,10 +25,22 @@ import org.junit.jupiter.api.Test
 import java.util
 import scala.jdk.CollectionConverters.{ListHasAsScala, MapHasAsJava, SetHasAsJava, SetHasAsScala}
 
+class RestTestModelExt1 extends RestTestModel {
+    @NCIntent("intent=onX term(t)={# == 'a'}")
+    @NCIntentSample(Array(
+        "oh, cat will feel happy",
+        "oh , cat will feel happy",
+        "oh  cat will feel happy"
+    ))
+    private def x(): NCResult = NCResult.text("OK")
+
+    override def getElements: util.Set[NCElement] =
+        (super.getElements.asScala ++ Set(NCTestElement("cat", "cat", "{^^{is_alphanum(tok_txt)}^^}[1,3]"))).asJava
+}
 /**
   * Note that context word server should be started.
   */
-@NCTestEnvironment(model = classOf[RestTestModel], startClient = false)
+@NCTestEnvironment(model = classOf[RestTestModelExt1], startClient = false)
 class NCRestModelSpec1 extends NCRestSpec {
     @Test
     def testSugsyn(): Unit = {
@@ -57,13 +69,24 @@ class NCRestModelSpec1 extends NCRestSpec {
             })
         )
 
+        post("model/sugsyn", "mdlId" -> "rest.test.model", "minScore" -> 0.5)(
+            ("$.status", (status: String) => assertEquals("API_OK", status)),
+            ("$.result.suggestions[:1].cat.*", (data: JList[java.util.Map[String, Object]]) => {
+                val scores = extract(data)
+
+                assertTrue(scores.nonEmpty)
+                assertTrue(scores.forall(s => s >= 0.5 && s <= 1))
+            })
+        )
+
+
         postError("model/sugsyn", 400, "NC_INVALID_FIELD", "mdlId" -> "UNKNOWN")
         postError("model/sugsyn", 400, "NC_INVALID_FIELD", "mdlId" -> "rest.test.model", "minScore" -> 2)
         postError("model/sugsyn", 400, "NC_ERROR")
     }
 }
 
-class RestTestModelExt extends RestTestModel {
+class RestTestModelExt2 extends RestTestModel {
     override def getMacros: util.Map[String, String] = {
         Map(
             "<M1>" -> "mtest1 {x|_}",
@@ -90,7 +113,7 @@ class RestTestModelExt extends RestTestModel {
 /**
   *
   */
-@NCTestEnvironment(model = classOf[RestTestModelExt], startClient = false)
+@NCTestEnvironment(model = classOf[RestTestModelExt2], startClient = false)
 class NCRestModelSpec2 extends NCRestSpec {
     @Test
     def testSyns(): Unit = {
diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/server/rest/RestTestModel.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/server/rest/RestTestModel.scala
index 0cb519e..8fa5b15 100644
--- a/nlpcraft/src/test/scala/org/apache/nlpcraft/server/rest/RestTestModel.scala
+++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/server/rest/RestTestModel.scala
@@ -55,15 +55,15 @@ class RestTestModel extends NCModelAdapter("rest.test.model", "REST test model",
 
     @NCIntent("intent=onA term(t)={# == 'a'}")
     @NCIntentSample(Array("My A"))
-    private def a(): NCResult = NCResult.text("OK")
+    def a(): NCResult = NCResult.text("OK")
 
     @NCIntent("intent=onB term(t)={# == 'b'}")
     @NCIntentSample(Array("My B"))
-    private def b(): NCResult = NCResult.text("OK")
+    def b(): NCResult = NCResult.text("OK")
 
     @NCIntent("intent=onMeta term(t)={# == 'meta'}")
     @NCIntentSample(Array("meta"))
-    private def meta(): NCResult = {
+    def meta(): NCResult = {
         val res = NCResult.text("OK")
 
         res.getMetadata.put(K1, V1)