You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nlpcraft.apache.org by se...@apache.org on 2020/06/12 08:55:11 UTC

[incubator-nlpcraft] branch NLPCRAFT-41 updated (6c5064f -> 7479b7f)

This is an automated email from the ASF dual-hosted git repository.

sergeykamov pushed a change to branch NLPCRAFT-41
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git.


    from 6c5064f  WIP.
     add 73ce788  NLPCRAFT-67: update README, add multiple indexes, add ftext_score parameter
     new f9c9ae4  Merge branch 'NLPCRAFT-67' into NLPCRAFT-41
     new 7479b7f  WIP.

The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 nlpcraft/src/main/python/ctxword/README.md         |  9 ++--
 nlpcraft/src/main/python/ctxword/bertft/bertft.py  | 36 ++++++++-------
 .../ctxword/jupyter/Trasnsformers-FastText.ipynb   |  5 +--
 nlpcraft/src/main/python/ctxword/server.py         |  9 ++--
 .../tools/suggestions/NCSuggestionsGenerator.scala | 52 +++++++++++-----------
 5 files changed, 58 insertions(+), 53 deletions(-)


[incubator-nlpcraft] 01/02: Merge branch 'NLPCRAFT-67' into NLPCRAFT-41

Posted by se...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

sergeykamov pushed a commit to branch NLPCRAFT-41
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git

commit f9c9ae4e203b761dc01ffd0a8d19f7cb1bad160b
Merge: 6c5064f 73ce788
Author: Sergey Kamov <se...@apache.org>
AuthorDate: Fri Jun 12 09:08:39 2020 +0300

    Merge branch 'NLPCRAFT-67' into NLPCRAFT-41

 nlpcraft/src/main/python/ctxword/README.md         |  9 ++++--
 nlpcraft/src/main/python/ctxword/bertft/bertft.py  | 36 ++++++++++++----------
 .../ctxword/jupyter/Trasnsformers-FastText.ipynb   |  5 ++-
 nlpcraft/src/main/python/ctxword/server.py         |  9 +++---
 4 files changed, 33 insertions(+), 26 deletions(-)


[incubator-nlpcraft] 02/02: WIP.

Posted by se...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

sergeykamov pushed a commit to branch NLPCRAFT-41
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git

commit 7479b7f0076b396b2f4ecc1f72f1ecf7c1d0d0e6
Author: Sergey Kamov <se...@apache.org>
AuthorDate: Fri Jun 12 11:55:01 2020 +0300

    WIP.
---
 .../tools/suggestions/NCSuggestionsGenerator.scala | 52 +++++++++++-----------
 1 file changed, 25 insertions(+), 27 deletions(-)

diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/tools/suggestions/NCSuggestionsGenerator.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/tools/suggestions/NCSuggestionsGenerator.scala
index a1fddc9..17d70de 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/tools/suggestions/NCSuggestionsGenerator.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/tools/suggestions/NCSuggestionsGenerator.scala
@@ -41,10 +41,10 @@ import scala.collection._
 case class ParametersHolder(modelPath: String, url: String, limit: Int, minScore: Double, debug: Boolean)
 
 object NCSuggestionsGeneratorImpl {
-    case class Suggestion(word: String, index1: Double, index2: Double, index3: Double)
+    case class Suggestion(word: String, totalScore: Double, fastTextScore: Double, bertScore: Double)
 
     case class RequestData(sentence: String, example: String, elementId: String, index: Int)
-    case class RestRequest(sentences: java.util.List[java.util.List[Any]], limit: Int, simple: Boolean = false)
+    case class RestRequest(sentences: java.util.List[java.util.List[Any]], limit: Int, min_score: Double, simple: Boolean = false)
     case class Word(word: String, stem: String) {
         require(!word.contains(" "), s"Word cannot contains spaces: $word")
         require(
@@ -82,11 +82,11 @@ object NCSuggestionsGeneratorImpl {
                         else
                             p.asScala.tail.map(p ⇒
                                 Suggestion(
-                                    word = p.get(2).asInstanceOf[String],
-                                    index1 = p.get(0).asInstanceOf[Double],
-                                    index2 = p.get(1).asInstanceOf[Double],
-                                    index3 = p.get(3).asInstanceOf[Double]
-                                ),
+                                    word = p.get(0).asInstanceOf[String],
+                                    totalScore = p.get(1).asInstanceOf[Double],
+                                    fastTextScore = p.get(2).asInstanceOf[Double],
+                                    bertScore = p.get(3).asInstanceOf[Double]
+                                )
                             )
                     )
 
@@ -198,7 +198,9 @@ object NCSuggestionsGeneratorImpl {
                             GSON.toJson(
                                 RestRequest(
                                     sentences = batch.map(p ⇒ Seq(p.sentence, p.index).asJava).asJava,
-                                    limit = data.limit)
+                                    min_score = data.minScore,
+                                    limit = data.limit
+                                )
                             ),
                             "UTF-8"
                         )
@@ -242,22 +244,23 @@ object NCSuggestionsGeneratorImpl {
 
         val filteredSuggs =
             allSuggs.asScala.map {
-                case (elemId, elemSuggs) ⇒ elemId → elemSuggs.asScala.filter(_.index1 >= data.minScore)
+                case (elemId, elemSuggs) ⇒ elemId → elemSuggs.asScala.filter(_.totalScore >= data.minScore)
             }.filter(_._2.nonEmpty)
 
-        val avgScores = filteredSuggs.map { case (elemId, suggs) ⇒ elemId → (suggs.map(_.index1).sum / suggs.size) }
+        val avgScores = filteredSuggs.map { case (elemId, suggs) ⇒ elemId → (suggs.map(_.totalScore).sum / suggs.size) }
         val counts = filteredSuggs.map { case (elemId, suggs) ⇒ elemId → suggs.size }
 
         val tbl = NCAsciiTable()
 
+        // TODO: which columns do we need?
         tbl #= (
             "Element",
             "Suggestion",
-            "Summary factor",
+            "Summary Score",
             "Count",
-            "F1",
-            "F2",
-            "F3"
+            "ContextWord Score",
+            "FastText Score",
+            "Bert Score"
         )
 
         filteredSuggs.
@@ -267,7 +270,7 @@ object NCSuggestionsGeneratorImpl {
                     groupBy { case (_, stem) ⇒ stem }.
                     filter { case (stem, _) ⇒ !allSynsStems.contains(stem) }.
                     map { case (_, group) ⇒
-                        val seq = group.map { case (sugg, _) ⇒ sugg }.sortBy(-_.index1)
+                        val seq = group.map { case (sugg, _) ⇒ sugg }.sortBy(-_.totalScore)
 
                         // Drops repeated.
                         (seq.head, seq.length)
@@ -277,7 +280,7 @@ object NCSuggestionsGeneratorImpl {
                 val normFactor = seq.map(_._2).sum.toDouble / seq.size / avgScores(elemId)
 
                 seq.
-                    map { case (sugg, cnt) ⇒ (sugg, cnt, sugg.index1 * normFactor * cnt.toDouble / counts(elemId)) }.
+                    map { case (sugg, cnt) ⇒ (sugg, cnt, sugg.totalScore * normFactor * cnt.toDouble / counts(elemId)) }.
                     sortBy { case (_, _, cumFactor) ⇒ -cumFactor }.
                     zipWithIndex.
                     foreach { case ((sugg, cnt, cumFactor), sugIdx) ⇒
@@ -288,9 +291,9 @@ object NCSuggestionsGeneratorImpl {
                             sugg.word,
                             f(cumFactor),
                             cnt,
-                            f(sugg.index1),
-                            f(sugg.index2),
-                            f(sugg.index3)
+                            f(sugg.totalScore),
+                            f(sugg.fastTextScore),
+                            f(sugg.bertScore)
                         )
                     }
             }
@@ -325,7 +328,7 @@ object NCSuggestionsGeneratorImpl {
 
 object NCSuggestionsGenerator extends App {
     private lazy val DFLT_URL: String = "http://localhost:5000/suggestions"
-    private lazy val DFLT_LIMIT: Int = 10 // TODO: add scoreLimit
+    private lazy val DFLT_LIMIT: Int = 10
     private lazy val DFLT_MIN_SCORE: Double = 0
     private lazy val DFLT_DEBUG: Boolean = false
 
@@ -335,13 +338,8 @@ object NCSuggestionsGenerator extends App {
       */
     private def errorExit(msg: String = null): Unit = {
         if (msg != null)
-            System.err.println(
-                s"""
-                   |ERROR:
-                   |    $msg""".stripMargin
-            )
-
-        if (msg == null)
+            System.err.println(s"ERROR: $msg")
+        else
             System.err.println(
                 s"""
                    |NAME: