You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nlpcraft.apache.org by se...@apache.org on 2020/06/10 14:05:51 UTC

[incubator-nlpcraft] branch NLPCRAFT-41 updated: WIP.

This is an automated email from the ASF dual-hosted git repository.

sergeykamov pushed a commit to branch NLPCRAFT-41
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git


The following commit(s) were added to refs/heads/NLPCRAFT-41 by this push:
     new 6c5064f  WIP.
6c5064f is described below

commit 6c5064f1a44d72e45e57673d6f629be2bd092445
Author: Sergey Kamov <se...@apache.org>
AuthorDate: Wed Jun 10 17:05:45 2020 +0300

    WIP.
---
 .../tools/suggestions/NCSuggestionsGenerator.scala | 51 ++++++++--------------
 1 file changed, 18 insertions(+), 33 deletions(-)

diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/tools/suggestions/NCSuggestionsGenerator.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/tools/suggestions/NCSuggestionsGenerator.scala
index b6c2a37..a1fddc9 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/tools/suggestions/NCSuggestionsGenerator.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/tools/suggestions/NCSuggestionsGenerator.scala
@@ -38,20 +38,24 @@ import org.apache.nlpcraft.model.NCModelFileAdapter
 import scala.collection.JavaConverters._
 import scala.collection._
 
-case class ParametersHolder(
-    modelPath: String,
-    url: String,
-    limit: Int,
-    minScore: Double,
-    synonymsWords: Int,
-    debug: Boolean
-)
+case class ParametersHolder(modelPath: String, url: String, limit: Int, minScore: Double, debug: Boolean)
 
 object NCSuggestionsGeneratorImpl {
     case class Suggestion(word: String, index1: Double, index2: Double, index3: Double)
 
     case class RequestData(sentence: String, example: String, elementId: String, index: Int)
     case class RestRequest(sentences: java.util.List[java.util.List[Any]], limit: Int, simple: Boolean = false)
+    case class Word(word: String, stem: String) {
+        require(!word.contains(" "), s"Word cannot contains spaces: $word")
+        require(
+            word.forall(ch ⇒
+                ch.isLetterOrDigit ||
+                    ch == '\'' ||
+                    SEPARATORS.contains(ch)
+            ),
+            s"Unsupported symbols: $word"
+        )
+    }
 
     private final val GSON = new Gson
     private final val TYPE_RESP = new TypeToken[java.util.List[java.util.List[java.util.List[Any]]]]() {}.getType
@@ -119,20 +123,6 @@ object NCSuggestionsGeneratorImpl {
         if (mdl.getMacros != null)
             mdl.getMacros.asScala.foreach { case (name, str) ⇒ parser.addMacro(name, str) }
 
-        val client = HttpClients.createDefault
-
-        case class Word(word: String, stem: String) {
-            require(!word.contains(" "), s"Word cannot contains spaces: $word")
-            require(
-                word.forall(ch ⇒
-                    ch.isLetterOrDigit ||
-                        ch == '\'' ||
-                        SEPARATORS.contains(ch)
-                ),
-                s"Unsupported symbols: $word"
-            )
-        }
-
         val examples =
             mdl.getExamples.asScala.
                 map(ex ⇒ SEPARATORS.foldLeft(ex)((s, ch) ⇒ s.replaceAll(s"\\$ch", s" $ch "))).
@@ -150,7 +140,7 @@ object NCSuggestionsGeneratorImpl {
         val allReqs =
             elemSyns.map {
                 case (elemId, syns) ⇒
-                    val normSyns: Seq[Seq[Word]] = syns.filter(_.size <= data.synonymsWords)
+                    val normSyns: Seq[Seq[Word]] = syns.filter(_.size == 1)
                     val synsStems = normSyns.map(_.map(_.stem))
                     val synsWords = normSyns.map(_.map(_.word))
 
@@ -177,8 +167,8 @@ object NCSuggestionsGeneratorImpl {
                                 )
                             }
 
-                            (for (idx ← exampleIdxs; (synStems, i) ← synsStems.zipWithIndex) yield mkRequestData(idx, synStems, i)).
-                                distinct
+                            (for (idx ← exampleIdxs; (synStems, i) ← synsStems.zipWithIndex)
+                                yield mkRequestData(idx, synStems, i)).distinct
                         }
 
                     elemId → reqs.toSet
@@ -195,6 +185,8 @@ object NCSuggestionsGeneratorImpl {
         val debugs = mutable.HashMap.empty[RequestData, Seq[Suggestion]]
         val cnt = new AtomicInteger(0)
 
+        val client = HttpClients.createDefault
+
         for ((elemId, reqs) ← allReqs; batch ← reqs.sliding(BATCH_SIZE, BATCH_SIZE).map(_.toSeq)) {
             NCUtils.asFuture(
                 _ ⇒ {
@@ -335,7 +327,6 @@ object NCSuggestionsGenerator extends App {
     private lazy val DFLT_URL: String = "http://localhost:5000/suggestions"
     private lazy val DFLT_LIMIT: Int = 10 // TODO: add scoreLimit
     private lazy val DFLT_MIN_SCORE: Double = 0
-    private lazy val DFLT_SYNONYMNS_WORDS: Int = 1
     private lazy val DFLT_DEBUG: Boolean = false
 
     /**
@@ -386,10 +377,6 @@ object NCSuggestionsGenerator extends App {
                |        Optional minimal suggestion score value.
                |        Default is $DFLT_MIN_SCORE.
                |
-               |    [--syns|-s] synonyms count
-               |        Optional words count which defined which synonyms words count supported.
-               |        Default is $DFLT_SYNONYMNS_WORDS.
-               |
                |    [--debug|-d] [true|false]
                |        Optional flag on whether or not to debug output.
                |        Default is $DFLT_DEBUG.
@@ -466,7 +453,6 @@ object NCSuggestionsGenerator extends App {
         var url = DFLT_URL
         var limit = DFLT_LIMIT
         var minScore = DFLT_MIN_SCORE
-        var synsWords = DFLT_SYNONYMNS_WORDS
         var debug = DFLT_DEBUG
 
         var i = 0
@@ -481,7 +467,6 @@ object NCSuggestionsGenerator extends App {
                     case "--url" | "-u" ⇒ url = v
                     case "--limit" | "-l" ⇒ limit = parseNum(v, k, (s: String) ⇒ s.toInt, 1, Integer.MAX_VALUE)
                     case "--score" | "-c" ⇒ minScore = parseNum(v, k, (s: String) ⇒ s.toDouble, 0, Integer.MAX_VALUE)
-                    case "--syns" | "-s" ⇒ synsWords = parseNum(v, k, (s: String) ⇒ s.toInt, 1, Integer.MAX_VALUE)
                     case "--debug" | "-d" ⇒ debug = parseBoolean(v, k)
 
                     case _ ⇒ throw new IllegalArgumentException(s"Invalid argument: ${cmdArgs(i)}")
@@ -496,7 +481,7 @@ object NCSuggestionsGenerator extends App {
             case e: Exception ⇒ errorExit(e.getMessage)
         }
 
-        ParametersHolder(mdlPath, url, limit, minScore, synsWords, debug)
+        ParametersHolder(mdlPath, url, limit, minScore, debug)
     }
 
     NCSuggestionsGeneratorImpl.process(parseCmdParameters(args))