You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nlpcraft.apache.org by se...@apache.org on 2020/09/18 07:10:24 UTC

[incubator-nlpcraft] 09/12: WIP.

This is an automated email from the ASF dual-hosted git repository.

sergeykamov pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git

commit d36a9920e6ef3bcb649ff7ef28b8faa79a0dea2e
Author: Sergey Kamov <se...@apache.org>
AuthorDate: Fri Sep 18 09:30:56 2020 +0300

    WIP.
---
 .../server/sugsyn/NCSuggestSynonymManager.scala    | 34 +++++++++++++++-------
 .../nlpcraft/server/rest/NCRestModelSpec.scala     |  8 ++---
 2 files changed, 27 insertions(+), 15 deletions(-)

diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/sugsyn/NCSuggestSynonymManager.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/sugsyn/NCSuggestSynonymManager.scala
index 9e4c358..b59f2dc 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/sugsyn/NCSuggestSynonymManager.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/sugsyn/NCSuggestSynonymManager.scala
@@ -106,7 +106,7 @@ object NCSuggestSynonymManager extends NCService {
             s"Unsupported symbols: $word"
         )
     }
-    case class SuggestionResult(synonym: String, ctxWordSrvScore: Double, sgstCnt: Int)
+    case class SuggestionResult(synonym: String, factor: Double)
 
     private def split(s: String): Seq[String] = s.split(" ").toSeq.map(_.trim).filter(_.nonEmpty)
     private def toStem(s: String): String = split(s).map(NCNlpPorterStemmer.stem).mkString(" ")
@@ -191,7 +191,7 @@ object NCSuggestSynonymManager extends NCService {
                             promise.success(
                                 NCSuggestSynonymResult(
                                     modelId = mdlId,
-                                    minScore = minScore,
+                                    minScore = 0,
                                     durationMs = System.currentTimeMillis() - now,
                                     timestamp = now,
                                     error = err,
@@ -387,13 +387,13 @@ object NCSuggestSynonymManager extends NCService {
                                             val seq = group.map { case (sgst, _) ⇒ sgst }.sortBy(-_.score)
 
                                             // Drops repeated.
-                                            (seq.head, seq.length)
+                                            (seq.head.word, seq.length, seq.map(_.score).sum / seq.size)
                                         }.
                                         toSeq.
-                                        map { case (sgst, cnt) ⇒ (sgst, cnt, sgst.score * cnt / elemSgsts.size) }.
+                                        map { case (sgst, cnt, score) ⇒ (sgst, cnt, score * cnt / elemSgsts.size) }.
                                         sortBy { case (_, _, sumFactor) ⇒ -sumFactor }.
                                         zipWithIndex.
-                                        foreach { case ((sgst, cnt, _), _) ⇒
+                                        foreach { case ((word, _, sumFactor), _) ⇒
                                             val seq =
                                                 res.get(elemId) match {
                                                     case Some(seq) ⇒ seq
@@ -405,19 +405,31 @@ object NCSuggestSynonymManager extends NCService {
                                                         buf
                                                 }
 
-                                            seq += SuggestionResult(sgst.word, sgst.score, cnt)
+                                            seq += SuggestionResult(word, sumFactor)
                                         }
                                 }
 
                                 val resJ: util.Map[String, util.List[util.HashMap[String, Any]]] =
                                     res.map { case (id, data) ⇒
-                                        id → data.map(d ⇒ {
+                                        val factors = data.map(_.factor)
+
+                                        val min = factors.min
+                                        val max = factors.max
+                                        var delta = max - min
+
+                                        if (delta == 0)
+                                            delta = max
+
+                                        def normalize(v: Double): Double = (v - min) / delta
+
+                                        val norm = data.map(s ⇒ SuggestionResult(s.synonym, normalize(s.factor))).
+                                            filter(_.factor >= minScore)
+
+                                        id → norm.map(d ⇒ {
                                             val m = new util.HashMap[String, Any]()
 
-                                            m.put("synonym", d.synonym)
-                                            // ContextWord server range is (0, 2)
-                                            m.put("ctxWordServerScore", d.ctxWordSrvScore / 2)
-                                            m.put("suggestedCount", d.sgstCnt)
+                                            m.put("synonym", d.synonym.toLowerCase)
+                                            m.put("factor", d.factor)
 
                                             m
                                         }).asJava
diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/server/rest/NCRestModelSpec.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/server/rest/NCRestModelSpec.scala
index 2c4ef04..444d38a 100644
--- a/nlpcraft/src/test/scala/org/apache/nlpcraft/server/rest/NCRestModelSpec.scala
+++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/server/rest/NCRestModelSpec.scala
@@ -18,20 +18,20 @@
 package org.apache.nlpcraft.server.rest
 
 import org.apache.nlpcraft.NCTestEnvironment
-import org.apache.nlpcraft.examples.time.TimeModel
+import org.apache.nlpcraft.examples.alarm.AlarmModel
 import org.junit.jupiter.api.Assertions._
 import org.junit.jupiter.api.{Disabled, Test}
 
 // Enable it and run if context word server started.
 @Disabled
-@NCTestEnvironment(model = classOf[TimeModel], startClient = false)
+@NCTestEnvironment(model = classOf[AlarmModel], startClient = false)
 class NCRestModelSpec extends NCRestSpec {
     @Test
     def test(): Unit = {
-        post("model/sugsyn", "mdlId" → "nlpcraft.time.ex")(
+        post("model/sugsyn", "mdlId" → "nlpcraft.alarm.ex")(
             ("$.status", (status: String) ⇒ assertEquals("API_OK", status))
         )
-        post("model/sugsyn", "mdlId" → "nlpcraft.time.ex", "minScore" → 0.5)(
+        post("model/sugsyn", "mdlId" → "nlpcraft.alarm.ex", "minScore" → 0.5)(
             ("$.status", (status: String) ⇒ assertEquals("API_OK", status))
         )
     }