You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nlpcraft.apache.org by se...@apache.org on 2020/06/14 09:05:33 UTC

[incubator-nlpcraft] branch NLPCRAFT-41 updated (0fe0461 -> c2ddeb5)

This is an automated email from the ASF dual-hosted git repository.

sergeykamov pushed a change to branch NLPCRAFT-41
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git.


    from 0fe0461  WIP.
     add 51faecf  NLPCRAFT-67: Change json response/request schema
     add 07da3df  WIP on NLPCRAFT-80.
     add 354b982  Merge branch 'master' into NLPCRAFT-67
     new a189ea2  Merge branch 'NLPCRAFT-67' into NLPCRAFT-41
     new c2ddeb5  WIP.

The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 nlpcraft/src/main/python/ctxword/README.md         |  8 ++---
 nlpcraft/src/main/python/ctxword/bertft/bertft.py  |  5 ++-
 nlpcraft/src/main/python/ctxword/bin/predict.sh    |  2 +-
 nlpcraft/src/main/python/ctxword/server.py         | 11 ++++--
 .../apache/nlpcraft/examples/sql/SqlModel.scala    | 16 ++++++---
 .../scala/org/apache/nlpcraft/model/NCIntent.java  |  1 +
 .../{NCIntentTerm.java => NCIntentExample.java}    | 21 ++++++-----
 .../org/apache/nlpcraft/model/NCIntentRef.java     |  1 +
 .../org/apache/nlpcraft/model/NCIntentTerm.java    |  1 +
 .../model/intent/impl/NCIntentScanner.scala        | 18 +++++-----
 .../tools/suggestions/NCSuggestionsGenerator.scala | 42 +++++++++-------------
 11 files changed, 71 insertions(+), 55 deletions(-)
 copy nlpcraft/src/main/scala/org/apache/nlpcraft/model/{NCIntentTerm.java => NCIntentExample.java} (68%)


[incubator-nlpcraft] 02/02: WIP.

Posted by se...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

sergeykamov pushed a commit to branch NLPCRAFT-41
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git

commit c2ddeb515d63355dd89172f909e28e084a8ef7dd
Author: Sergey Kamov <se...@apache.org>
AuthorDate: Sun Jun 14 12:05:22 2020 +0300

    WIP.
---
 .../tools/suggestions/NCSuggestionsGenerator.scala | 42 +++++++++-------------
 1 file changed, 17 insertions(+), 25 deletions(-)

diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/tools/suggestions/NCSuggestionsGenerator.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/tools/suggestions/NCSuggestionsGenerator.scala
index bbaecf0..87c779e 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/tools/suggestions/NCSuggestionsGenerator.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/tools/suggestions/NCSuggestionsGenerator.scala
@@ -35,17 +35,18 @@ import org.apache.nlpcraft.common.util.NCUtils
 import org.apache.nlpcraft.common.version.NCVersion
 import org.apache.nlpcraft.model.NCModelFileAdapter
 
+import java.util.{List ⇒ JList}
+
 import scala.collection.JavaConverters._
 import scala.collection._
 
 case class ParametersHolder(modelPath: String, url: String, limit: Int, minScore: Double, debug: Boolean)
 
 object NCSuggestionsGeneratorImpl {
-    // Bert score, FText score skipped here because user doesn't need it for analyze.
-    case class Suggestion(word: String, totalScore: Double)
-
+    case class Suggestion(word: String, score: Double)
     case class RequestData(sentence: String, example: String, elementId: String, index: Int)
-    case class RestRequest(sentences: java.util.List[java.util.List[Any]], limit: Int, min_score: Double)
+    case class RestRequestSentence(text: String, indexes: JList[Int])
+    case class RestRequest(sentences: JList[RestRequestSentence], limit: Int, min_score: Double)
     case class Word(word: String, stem: String) {
         require(!word.contains(" "), s"Word cannot contains spaces: $word")
         require(
@@ -59,9 +60,9 @@ object NCSuggestionsGeneratorImpl {
     }
 
     private final val GSON = new Gson
-    private final val TYPE_RESP = new TypeToken[java.util.List[java.util.List[java.util.List[Any]]]]() {}.getType
+    private final val TYPE_RESP = new TypeToken[JList[JList[Suggestion]]]() {}.getType
     private final val SEPARATORS = Seq('?', ',', '.', '-', '!')
-    private final val BATCH_SIZE = 100
+    private final val BATCH_SIZE = 20
 
     private final val HANDLER: ResponseHandler[Seq[Seq[Suggestion]]] =
         (resp: HttpResponse) ⇒ {
@@ -75,18 +76,9 @@ object NCSuggestionsGeneratorImpl {
 
             code match {
                 case 200 ⇒
-                    val data: java.util.List[java.util.List[java.util.List[Any]]] = GSON.fromJson(js, TYPE_RESP)
-
-                    data.asScala.map(p ⇒
-                        if (p.isEmpty)
-                            Seq.empty
-                        else
-                            p.asScala.tail.map(p ⇒
-                                Suggestion(
-                                    word = p.get(0).asInstanceOf[String], totalScore = p.get(1).asInstanceOf[Double]
-                                )
-                            )
-                    )
+                    val data: JList[JList[Suggestion]] = GSON.fromJson(js, TYPE_RESP)
+
+                    data.asScala.map(p ⇒ if (p.isEmpty) Seq.empty else p.asScala.tail)
 
                 case 400 ⇒ throw new RuntimeException(js)
                 case _ ⇒ throw new RuntimeException(s"Unexpected response [code=$code, response=$js]")
@@ -178,7 +170,7 @@ object NCSuggestionsGeneratorImpl {
         println(s"Synonyms count: ${elemSyns.map(_._2.size).sum}")
         println(s"Request prepared: $allReqsCnt")
 
-        val allSuggs = new java.util.concurrent.ConcurrentHashMap[String, java.util.List[Suggestion]]()
+        val allSuggs = new java.util.concurrent.ConcurrentHashMap[String, JList[Suggestion]]()
         val cdl = new CountDownLatch(1)
         val debugs = mutable.HashMap.empty[RequestData, Seq[Suggestion]]
         val cnt = new AtomicInteger(0)
@@ -195,7 +187,7 @@ object NCSuggestionsGeneratorImpl {
                         new StringEntity(
                             GSON.toJson(
                                 RestRequest(
-                                    sentences = batch.map(p ⇒ Seq(p.sentence, p.index).asJava).asJava,
+                                    sentences = batch.map(p ⇒ RestRequestSentence(p.sentence, Seq(p.index).asJava)).asJava,
                                     min_score = data.minScore,
                                     limit = data.limit
                                 )
@@ -242,10 +234,10 @@ object NCSuggestionsGeneratorImpl {
 
         val filteredSuggs =
             allSuggs.asScala.map {
-                case (elemId, elemSuggs) ⇒ elemId → elemSuggs.asScala.filter(_.totalScore >= data.minScore)
+                case (elemId, elemSuggs) ⇒ elemId → elemSuggs.asScala.filter(_.score >= data.minScore)
             }.filter(_._2.nonEmpty)
 
-        val avgScores = filteredSuggs.map { case (elemId, suggs) ⇒ elemId → (suggs.map(_.totalScore).sum / suggs.size) }
+        val avgScores = filteredSuggs.map { case (elemId, suggs) ⇒ elemId → (suggs.map(_.score).sum / suggs.size) }
         val counts = filteredSuggs.map { case (elemId, suggs) ⇒ elemId → suggs.size }
 
         val tbl = NCAsciiTable()
@@ -265,7 +257,7 @@ object NCSuggestionsGeneratorImpl {
                     groupBy { case (_, stem) ⇒ stem }.
                     filter { case (stem, _) ⇒ !allSynsStems.contains(stem) }.
                     map { case (_, group) ⇒
-                        val seq = group.map { case (sugg, _) ⇒ sugg }.sortBy(-_.totalScore)
+                        val seq = group.map { case (sugg, _) ⇒ sugg }.sortBy(-_.score)
 
                         // Drops repeated.
                         (seq.head, seq.length)
@@ -275,7 +267,7 @@ object NCSuggestionsGeneratorImpl {
                 val normFactor = seq.map(_._2).sum.toDouble / seq.size / avgScores(elemId)
 
                 seq.
-                    map { case (sugg, cnt) ⇒ (sugg, cnt, sugg.totalScore * normFactor * cnt.toDouble / counts(elemId)) }.
+                    map { case (sugg, cnt) ⇒ (sugg, cnt, sugg.score * normFactor * cnt.toDouble / counts(elemId)) }.
                     sortBy { case (_, _, cumFactor) ⇒ -cumFactor }.
                     zipWithIndex.
                     foreach { case ((sugg, cnt, cumFactor), sugIdx) ⇒
@@ -284,7 +276,7 @@ object NCSuggestionsGeneratorImpl {
                         tbl += (
                             if (sugIdx == 0) elemId else " ",
                             sugg.word,
-                            f(sugg.totalScore),
+                            f(sugg.score),
                             cnt,
                             f(cumFactor)
                         )


[incubator-nlpcraft] 01/02: Merge branch 'NLPCRAFT-67' into NLPCRAFT-41

Posted by se...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

sergeykamov pushed a commit to branch NLPCRAFT-41
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git

commit a189ea26f1a18505ab21463045f9324fabf542f2
Merge: 0fe0461 354b982
Author: Sergey Kamov <se...@apache.org>
AuthorDate: Sun Jun 14 11:47:20 2020 +0300

    Merge branch 'NLPCRAFT-67' into NLPCRAFT-41

 nlpcraft/src/main/python/ctxword/README.md          |  8 ++++----
 nlpcraft/src/main/python/ctxword/bertft/bertft.py   |  5 ++++-
 nlpcraft/src/main/python/ctxword/bin/predict.sh     |  2 +-
 nlpcraft/src/main/python/ctxword/server.py          | 11 +++++++++--
 .../org/apache/nlpcraft/examples/sql/SqlModel.scala | 16 ++++++++++++----
 .../scala/org/apache/nlpcraft/model/NCIntent.java   |  1 +
 .../{NCIntentTerm.java => NCIntentExample.java}     | 21 ++++++++++++---------
 .../org/apache/nlpcraft/model/NCIntentRef.java      |  1 +
 .../org/apache/nlpcraft/model/NCIntentTerm.java     |  1 +
 .../model/intent/impl/NCIntentScanner.scala         | 18 +++++++++---------
 10 files changed, 54 insertions(+), 30 deletions(-)