You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nlpcraft.apache.org by se...@apache.org on 2020/04/26 09:42:40 UTC

[incubator-nlpcraft] branch NLPCRAFT-41 updated (9897281 -> 8fac9cc)

This is an automated email from the ASF dual-hosted git repository.

sergeykamov pushed a change to branch NLPCRAFT-41
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git.


    from 9897281  Merge branch 'master' into NLPCRAFT-41
     new ef4e0cc  WIP.
     new 8fac9cc  WIP.

The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 .../model/tools/synonyms/NCSynonymsGenerator.scala | 46 ++++++++++++++--------
 1 file changed, 29 insertions(+), 17 deletions(-)


[incubator-nlpcraft] 01/02: WIP.

Posted by se...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

sergeykamov pushed a commit to branch NLPCRAFT-41
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git

commit ef4e0cc7659319aee10d5a036909709965690fd5
Author: Sergey Kamov <se...@apache.org>
AuthorDate: Sun Apr 26 12:15:20 2020 +0300

    WIP.
---
 .../model/tools/synonyms/NCSynonymsGenerator.scala | 59 +++++++++++++++++-----
 1 file changed, 45 insertions(+), 14 deletions(-)

diff --git a/src/main/scala/org/apache/nlpcraft/model/tools/synonyms/NCSynonymsGenerator.scala b/src/main/scala/org/apache/nlpcraft/model/tools/synonyms/NCSynonymsGenerator.scala
index 20b0f18..a94b11d 100644
--- a/src/main/scala/org/apache/nlpcraft/model/tools/synonyms/NCSynonymsGenerator.scala
+++ b/src/main/scala/org/apache/nlpcraft/model/tools/synonyms/NCSynonymsGenerator.scala
@@ -17,6 +17,8 @@
 package org.apache.nlpcraft.model.tools.synonyms
 
 import java.lang.reflect.Type
+import java.util
+import java.util.concurrent.CopyOnWriteArrayList
 
 import com.google.gson.Gson
 import com.google.gson.reflect.TypeToken
@@ -29,6 +31,7 @@ import org.apache.http.util.EntityUtils
 import org.apache.nlpcraft.common.ascii.NCAsciiTable
 import org.apache.nlpcraft.common.makro.NCMacroParser
 import org.apache.nlpcraft.common.nlp.core.NCNlpPorterStemmer
+import org.apache.nlpcraft.common.util.NCUtils
 import org.apache.nlpcraft.model.NCModelFileAdapter
 
 import scala.collection._
@@ -120,31 +123,25 @@ case class NCSynonymsGenerator(url: String, modelPath: String, minFactor: Double
             mdl.getElements.asScala.map(e ⇒ e.getId → e.getSynonyms.asScala.flatMap(parser.expand)).
                 map { case (id, seq) ⇒ id → seq.map(txt ⇒ split(txt).map(p ⇒ Word(p, toStemWord(p))))}.toMap
 
-        val cache = mutable.HashMap.empty[String, Seq[Suggestion]].withDefault(
-            new (String ⇒ Seq[Suggestion]) {
-                override def apply(sen: String): Seq[Suggestion] = ask(client, sen).filter(_.score.toDouble >= minFactor)
-            }
-        )
 
-        val allSuggs =
+        val allSens: Map[String, Seq[String]] =
             elemSyns.map {
                 case (elemId, elemSyns) ⇒
                     val elemSingleSyns = elemSyns.filter(_.size == 1).map(_.head)
                     val elemStems = elemSingleSyns.map(_.stem)
 
-                    val hs: Seq[Suggestion] =
+                    val hs =
                         examples.flatMap(example ⇒ {
                             val exStems = example.map(_.stem)
                             val idxs = exStems.flatMap(s ⇒ if (elemStems.contains(s)) Some(exStems.indexOf(s)) else None)
 
                             if (idxs.nonEmpty)
                                 elemSingleSyns.map(_.word).flatMap(syn ⇒
-                                    idxs.flatMap(idx ⇒
-                                        cache(
-                                            example.
-                                            zipWithIndex.map { case (w, i1) ⇒ if (idxs.contains(i1)) syn else w.word }.
-                                            zipWithIndex.map { case (s, i2) ⇒ if (i2 == idx) s"$s#" else s}.
-                                            mkString(" "))
+                                    idxs.map(idx ⇒
+                                        example.
+                                        zipWithIndex.map { case (w, i1) ⇒ if (idxs.contains(i1)) syn else w.word }.
+                                        zipWithIndex.map { case (s, i2) ⇒ if (i2 == idx) s"$s#" else s}.
+                                        mkString(" ")
                                     )
                                 )
                             else
@@ -154,13 +151,47 @@ case class NCSynonymsGenerator(url: String, modelPath: String, minFactor: Double
                     elemId → hs
             }.filter(_._2.nonEmpty)
 
+//        val cache = mutable.HashMap.empty[String, Seq[Suggestion]].withDefault(
+//            new (String ⇒ Seq[Suggestion]) {
+//                override def apply(sen: String): Seq[Suggestion] = ask(client, sen).filter(_.score.toDouble >= minFactor)
+//            }
+//        )
+
+        val cache = new java.util.concurrent.ConcurrentHashMap[String, Seq[Suggestion]] ()
+
+        val allSuggs = new java.util.concurrent.ConcurrentHashMap[String, java.util.List[Suggestion]] ()
+
+        for ((elemId, sens) <- allSens; sen <- sens) {
+            NCUtils.asFuture(
+                () ⇒ {
+                    val senSuggs: Seq[Suggestion] = cache.computeIfAbsent(
+                        sen,
+                        new Function[String, Seq[Suggestion]]() {
+                            override def apply(v1: String): Seq[Suggestion] = ask(client, sen)
+                        }
+                    )
+
+                    val elemSugs: util.List[Suggestion] = allSuggs.computeIfAbsent(
+                        elemId,
+                        new Function[String, util.List[Suggestion]]() {
+                            override def apply(v1: String): util.List[Suggestion] = new CopyOnWriteArrayList[Suggestion]()
+                        }
+                    )
+
+                    elemSugs.addAll(senSuggs)
+                },
+                (t: Throwable) ⇒ (),
+                (t: Throwable) ⇒ ()
+            )
+        }
+
         val allSynsStems = elemSyns.flatMap(_._2).flatten.map(_.stem).toSet
 
         val table = NCAsciiTable()
 
         table #= ("Element", "Suggestions")
 
-        allSuggs.foreach { case (elemId, elemSuggs) ⇒
+        allSuggs.asScala.map { case (id, elemSuggs) ⇒ id → elemSuggs.asScala}.foreach { case (elemId, elemSuggs) ⇒
             elemSuggs.
                 map(sugg ⇒ (sugg, toStem(sugg.word))).
                 groupBy { case (_, stem) ⇒ stem }.


[incubator-nlpcraft] 02/02: WIP.

Posted by se...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

sergeykamov pushed a commit to branch NLPCRAFT-41
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git

commit 8fac9ccfc8a81987a36c8a0689b2603c320eff1e
Author: Sergey Kamov <se...@apache.org>
AuthorDate: Sun Apr 26 12:42:30 2020 +0300

    WIP.
---
 .../model/tools/synonyms/NCSynonymsGenerator.scala | 45 +++++++---------------
 1 file changed, 13 insertions(+), 32 deletions(-)

diff --git a/src/main/scala/org/apache/nlpcraft/model/tools/synonyms/NCSynonymsGenerator.scala b/src/main/scala/org/apache/nlpcraft/model/tools/synonyms/NCSynonymsGenerator.scala
index a94b11d..e1d8a9b 100644
--- a/src/main/scala/org/apache/nlpcraft/model/tools/synonyms/NCSynonymsGenerator.scala
+++ b/src/main/scala/org/apache/nlpcraft/model/tools/synonyms/NCSynonymsGenerator.scala
@@ -17,8 +17,7 @@
 package org.apache.nlpcraft.model.tools.synonyms
 
 import java.lang.reflect.Type
-import java.util
-import java.util.concurrent.CopyOnWriteArrayList
+import java.util.concurrent.{CopyOnWriteArrayList, CountDownLatch, TimeUnit}
 
 import com.google.gson.Gson
 import com.google.gson.reflect.TypeToken
@@ -34,8 +33,8 @@ import org.apache.nlpcraft.common.nlp.core.NCNlpPorterStemmer
 import org.apache.nlpcraft.common.util.NCUtils
 import org.apache.nlpcraft.model.NCModelFileAdapter
 
-import scala.collection._
 import scala.collection.JavaConverters._
+import scala.collection._
 
 case class NCSynonymsGenerator(url: String, modelPath: String, minFactor: Double) {
     // TODO: all string fields
@@ -77,7 +76,6 @@ case class NCSynonymsGenerator(url: String, modelPath: String, minFactor: Double
     private def toStem(s: String): String = split(s).map(NCNlpPorterStemmer.stem).mkString(" ")
     private def toStemWord(s: String): String = NCNlpPorterStemmer.stem(s)
 
-    // TODO: multithreading.
     private def ask(client: CloseableHttpClient, sen: String): Seq[Suggestion] = {
         val post = new HttpPost(url)
 
@@ -105,7 +103,7 @@ case class NCSynonymsGenerator(url: String, modelPath: String, minFactor: Double
             require(
                 word.forall(ch ⇒
                     ch.isLetterOrDigit ||
-                    ch == ''' ||
+                    ch == '\'' ||
                     SEPARATORS.contains(ch)
                 ),
                 s"Unsupported symbols: $word"
@@ -151,39 +149,22 @@ case class NCSynonymsGenerator(url: String, modelPath: String, minFactor: Double
                     elemId → hs
             }.filter(_._2.nonEmpty)
 
-//        val cache = mutable.HashMap.empty[String, Seq[Suggestion]].withDefault(
-//            new (String ⇒ Seq[Suggestion]) {
-//                override def apply(sen: String): Seq[Suggestion] = ask(client, sen).filter(_.score.toDouble >= minFactor)
-//            }
-//        )
-
         val cache = new java.util.concurrent.ConcurrentHashMap[String, Seq[Suggestion]] ()
-
         val allSuggs = new java.util.concurrent.ConcurrentHashMap[String, java.util.List[Suggestion]] ()
 
-        for ((elemId, sens) <- allSens; sen <- sens) {
-            NCUtils.asFuture(
-                () ⇒ {
-                    val senSuggs: Seq[Suggestion] = cache.computeIfAbsent(
-                        sen,
-                        new Function[String, Seq[Suggestion]]() {
-                            override def apply(v1: String): Seq[Suggestion] = ask(client, sen)
-                        }
-                    )
+        val cdl = new CountDownLatch(allSens.map { case (_, seq) ⇒ seq.size }.sum)
 
-                    val elemSugs: util.List[Suggestion] = allSuggs.computeIfAbsent(
-                        elemId,
-                        new Function[String, util.List[Suggestion]]() {
-                            override def apply(v1: String): util.List[Suggestion] = new CopyOnWriteArrayList[Suggestion]()
-                        }
-                    )
-
-                    elemSugs.addAll(senSuggs)
+        for ((elemId, sens) ← allSens; sen ← sens)
+            NCUtils.asFuture(
+                _ ⇒ {
+                    allSuggs.computeIfAbsent(elemId, (_: String) ⇒ new CopyOnWriteArrayList[Suggestion]()).
+                        addAll(cache.computeIfAbsent(sen, (_: String) ⇒ ask(client, sen)).asJava)
                 },
-                (t: Throwable) ⇒ (),
-                (t: Throwable) ⇒ ()
+                (_: Throwable) ⇒ cdl.countDown(),
+                (_: Boolean) ⇒ cdl.countDown()
             )
-        }
+
+        cdl.await(Long.MaxValue, TimeUnit.MILLISECONDS)
 
         val allSynsStems = elemSyns.flatMap(_._2).flatten.map(_.stem).toSet