You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nlpcraft.apache.org by se...@apache.org on 2020/09/18 07:10:24 UTC
[incubator-nlpcraft] 09/12: WIP.
This is an automated email from the ASF dual-hosted git repository.
sergeykamov pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
commit d36a9920e6ef3bcb649ff7ef28b8faa79a0dea2e
Author: Sergey Kamov <se...@apache.org>
AuthorDate: Fri Sep 18 09:30:56 2020 +0300
WIP.
---
.../server/sugsyn/NCSuggestSynonymManager.scala | 34 +++++++++++++++-------
.../nlpcraft/server/rest/NCRestModelSpec.scala | 8 ++---
2 files changed, 27 insertions(+), 15 deletions(-)
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/sugsyn/NCSuggestSynonymManager.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/sugsyn/NCSuggestSynonymManager.scala
index 9e4c358..b59f2dc 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/sugsyn/NCSuggestSynonymManager.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/sugsyn/NCSuggestSynonymManager.scala
@@ -106,7 +106,7 @@ object NCSuggestSynonymManager extends NCService {
s"Unsupported symbols: $word"
)
}
- case class SuggestionResult(synonym: String, ctxWordSrvScore: Double, sgstCnt: Int)
+ case class SuggestionResult(synonym: String, factor: Double)
private def split(s: String): Seq[String] = s.split(" ").toSeq.map(_.trim).filter(_.nonEmpty)
private def toStem(s: String): String = split(s).map(NCNlpPorterStemmer.stem).mkString(" ")
@@ -191,7 +191,7 @@ object NCSuggestSynonymManager extends NCService {
promise.success(
NCSuggestSynonymResult(
modelId = mdlId,
- minScore = minScore,
+ minScore = 0,
durationMs = System.currentTimeMillis() - now,
timestamp = now,
error = err,
@@ -387,13 +387,13 @@ object NCSuggestSynonymManager extends NCService {
val seq = group.map { case (sgst, _) ⇒ sgst }.sortBy(-_.score)
// Drops repeated.
- (seq.head, seq.length)
+ (seq.head.word, seq.length, seq.map(_.score).sum / seq.size)
}.
toSeq.
- map { case (sgst, cnt) ⇒ (sgst, cnt, sgst.score * cnt / elemSgsts.size) }.
+ map { case (sgst, cnt, score) ⇒ (sgst, cnt, score * cnt / elemSgsts.size) }.
sortBy { case (_, _, sumFactor) ⇒ -sumFactor }.
zipWithIndex.
- foreach { case ((sgst, cnt, _), _) ⇒
+ foreach { case ((word, _, sumFactor), _) ⇒
val seq =
res.get(elemId) match {
case Some(seq) ⇒ seq
@@ -405,19 +405,31 @@ object NCSuggestSynonymManager extends NCService {
buf
}
- seq += SuggestionResult(sgst.word, sgst.score, cnt)
+ seq += SuggestionResult(word, sumFactor)
}
}
val resJ: util.Map[String, util.List[util.HashMap[String, Any]]] =
res.map { case (id, data) ⇒
- id → data.map(d ⇒ {
+ val factors = data.map(_.factor)
+
+ val min = factors.min
+ val max = factors.max
+ var delta = max - min
+
+ if (delta == 0)
+ delta = max
+
+ def normalize(v: Double): Double = (v - min) / delta
+
+ val norm = data.map(s ⇒ SuggestionResult(s.synonym, normalize(s.factor))).
+ filter(_.factor >= minScore)
+
+ id → norm.map(d ⇒ {
val m = new util.HashMap[String, Any]()
- m.put("synonym", d.synonym)
- // ContextWord server range is (0, 2)
- m.put("ctxWordServerScore", d.ctxWordSrvScore / 2)
- m.put("suggestedCount", d.sgstCnt)
+ m.put("synonym", d.synonym.toLowerCase)
+ m.put("factor", d.factor)
m
}).asJava
diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/server/rest/NCRestModelSpec.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/server/rest/NCRestModelSpec.scala
index 2c4ef04..444d38a 100644
--- a/nlpcraft/src/test/scala/org/apache/nlpcraft/server/rest/NCRestModelSpec.scala
+++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/server/rest/NCRestModelSpec.scala
@@ -18,20 +18,20 @@
package org.apache.nlpcraft.server.rest
import org.apache.nlpcraft.NCTestEnvironment
-import org.apache.nlpcraft.examples.time.TimeModel
+import org.apache.nlpcraft.examples.alarm.AlarmModel
import org.junit.jupiter.api.Assertions._
import org.junit.jupiter.api.{Disabled, Test}
// Enable it and run if context word server started.
@Disabled
-@NCTestEnvironment(model = classOf[TimeModel], startClient = false)
+@NCTestEnvironment(model = classOf[AlarmModel], startClient = false)
class NCRestModelSpec extends NCRestSpec {
@Test
def test(): Unit = {
- post("model/sugsyn", "mdlId" → "nlpcraft.time.ex")(
+ post("model/sugsyn", "mdlId" → "nlpcraft.alarm.ex")(
("$.status", (status: String) ⇒ assertEquals("API_OK", status))
)
- post("model/sugsyn", "mdlId" → "nlpcraft.time.ex", "minScore" → 0.5)(
+ post("model/sugsyn", "mdlId" → "nlpcraft.alarm.ex", "minScore" → 0.5)(
("$.status", (status: String) ⇒ assertEquals("API_OK", status))
)
}