You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nlpcraft.apache.org by se...@apache.org on 2020/04/26 09:42:40 UTC
[incubator-nlpcraft] branch NLPCRAFT-41 updated (9897281 -> 8fac9cc)
This is an automated email from the ASF dual-hosted git repository.
sergeykamov pushed a change to branch NLPCRAFT-41
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git.
from 9897281 Merge branch 'master' into NLPCRAFT-41
new ef4e0cc WIP.
new 8fac9cc WIP.
The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails. The revisions
listed as "add" were already present in the repository and have only
been added to this reference.
Summary of changes:
.../model/tools/synonyms/NCSynonymsGenerator.scala | 46 ++++++++++++++--------
1 file changed, 29 insertions(+), 17 deletions(-)
[incubator-nlpcraft] 01/02: WIP.
Posted by se...@apache.org.
This is an automated email from the ASF dual-hosted git repository.
sergeykamov pushed a commit to branch NLPCRAFT-41
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
commit ef4e0cc7659319aee10d5a036909709965690fd5
Author: Sergey Kamov <se...@apache.org>
AuthorDate: Sun Apr 26 12:15:20 2020 +0300
WIP.
---
.../model/tools/synonyms/NCSynonymsGenerator.scala | 59 +++++++++++++++++-----
1 file changed, 45 insertions(+), 14 deletions(-)
diff --git a/src/main/scala/org/apache/nlpcraft/model/tools/synonyms/NCSynonymsGenerator.scala b/src/main/scala/org/apache/nlpcraft/model/tools/synonyms/NCSynonymsGenerator.scala
index 20b0f18..a94b11d 100644
--- a/src/main/scala/org/apache/nlpcraft/model/tools/synonyms/NCSynonymsGenerator.scala
+++ b/src/main/scala/org/apache/nlpcraft/model/tools/synonyms/NCSynonymsGenerator.scala
@@ -17,6 +17,8 @@
package org.apache.nlpcraft.model.tools.synonyms
import java.lang.reflect.Type
+import java.util
+import java.util.concurrent.CopyOnWriteArrayList
import com.google.gson.Gson
import com.google.gson.reflect.TypeToken
@@ -29,6 +31,7 @@ import org.apache.http.util.EntityUtils
import org.apache.nlpcraft.common.ascii.NCAsciiTable
import org.apache.nlpcraft.common.makro.NCMacroParser
import org.apache.nlpcraft.common.nlp.core.NCNlpPorterStemmer
+import org.apache.nlpcraft.common.util.NCUtils
import org.apache.nlpcraft.model.NCModelFileAdapter
import scala.collection._
@@ -120,31 +123,25 @@ case class NCSynonymsGenerator(url: String, modelPath: String, minFactor: Double
mdl.getElements.asScala.map(e ⇒ e.getId → e.getSynonyms.asScala.flatMap(parser.expand)).
map { case (id, seq) ⇒ id → seq.map(txt ⇒ split(txt).map(p ⇒ Word(p, toStemWord(p))))}.toMap
- val cache = mutable.HashMap.empty[String, Seq[Suggestion]].withDefault(
- new (String ⇒ Seq[Suggestion]) {
- override def apply(sen: String): Seq[Suggestion] = ask(client, sen).filter(_.score.toDouble >= minFactor)
- }
- )
- val allSuggs =
+ val allSens: Map[String, Seq[String]] =
elemSyns.map {
case (elemId, elemSyns) ⇒
val elemSingleSyns = elemSyns.filter(_.size == 1).map(_.head)
val elemStems = elemSingleSyns.map(_.stem)
- val hs: Seq[Suggestion] =
+ val hs =
examples.flatMap(example ⇒ {
val exStems = example.map(_.stem)
val idxs = exStems.flatMap(s ⇒ if (elemStems.contains(s)) Some(exStems.indexOf(s)) else None)
if (idxs.nonEmpty)
elemSingleSyns.map(_.word).flatMap(syn ⇒
- idxs.flatMap(idx ⇒
- cache(
- example.
- zipWithIndex.map { case (w, i1) ⇒ if (idxs.contains(i1)) syn else w.word }.
- zipWithIndex.map { case (s, i2) ⇒ if (i2 == idx) s"$s#" else s}.
- mkString(" "))
+ idxs.map(idx ⇒
+ example.
+ zipWithIndex.map { case (w, i1) ⇒ if (idxs.contains(i1)) syn else w.word }.
+ zipWithIndex.map { case (s, i2) ⇒ if (i2 == idx) s"$s#" else s}.
+ mkString(" ")
)
)
else
@@ -154,13 +151,47 @@ case class NCSynonymsGenerator(url: String, modelPath: String, minFactor: Double
elemId → hs
}.filter(_._2.nonEmpty)
+// val cache = mutable.HashMap.empty[String, Seq[Suggestion]].withDefault(
+// new (String ⇒ Seq[Suggestion]) {
+// override def apply(sen: String): Seq[Suggestion] = ask(client, sen).filter(_.score.toDouble >= minFactor)
+// }
+// )
+
+ val cache = new java.util.concurrent.ConcurrentHashMap[String, Seq[Suggestion]] ()
+
+ val allSuggs = new java.util.concurrent.ConcurrentHashMap[String, java.util.List[Suggestion]] ()
+
+ for ((elemId, sens) <- allSens; sen <- sens) {
+ NCUtils.asFuture(
+ () ⇒ {
+ val senSuggs: Seq[Suggestion] = cache.computeIfAbsent(
+ sen,
+ new Function[String, Seq[Suggestion]]() {
+ override def apply(v1: String): Seq[Suggestion] = ask(client, sen)
+ }
+ )
+
+ val elemSugs: util.List[Suggestion] = allSuggs.computeIfAbsent(
+ elemId,
+ new Function[String, util.List[Suggestion]]() {
+ override def apply(v1: String): util.List[Suggestion] = new CopyOnWriteArrayList[Suggestion]()
+ }
+ )
+
+ elemSugs.addAll(senSuggs)
+ },
+ (t: Throwable) ⇒ (),
+ (t: Throwable) ⇒ ()
+ )
+ }
+
val allSynsStems = elemSyns.flatMap(_._2).flatten.map(_.stem).toSet
val table = NCAsciiTable()
table #= ("Element", "Suggestions")
- allSuggs.foreach { case (elemId, elemSuggs) ⇒
+ allSuggs.asScala.map { case (id, elemSuggs) ⇒ id → elemSuggs.asScala}.foreach { case (elemId, elemSuggs) ⇒
elemSuggs.
map(sugg ⇒ (sugg, toStem(sugg.word))).
groupBy { case (_, stem) ⇒ stem }.
[incubator-nlpcraft] 02/02: WIP.
Posted by se...@apache.org.
This is an automated email from the ASF dual-hosted git repository.
sergeykamov pushed a commit to branch NLPCRAFT-41
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
commit 8fac9ccfc8a81987a36c8a0689b2603c320eff1e
Author: Sergey Kamov <se...@apache.org>
AuthorDate: Sun Apr 26 12:42:30 2020 +0300
WIP.
---
.../model/tools/synonyms/NCSynonymsGenerator.scala | 45 +++++++---------------
1 file changed, 13 insertions(+), 32 deletions(-)
diff --git a/src/main/scala/org/apache/nlpcraft/model/tools/synonyms/NCSynonymsGenerator.scala b/src/main/scala/org/apache/nlpcraft/model/tools/synonyms/NCSynonymsGenerator.scala
index a94b11d..e1d8a9b 100644
--- a/src/main/scala/org/apache/nlpcraft/model/tools/synonyms/NCSynonymsGenerator.scala
+++ b/src/main/scala/org/apache/nlpcraft/model/tools/synonyms/NCSynonymsGenerator.scala
@@ -17,8 +17,7 @@
package org.apache.nlpcraft.model.tools.synonyms
import java.lang.reflect.Type
-import java.util
-import java.util.concurrent.CopyOnWriteArrayList
+import java.util.concurrent.{CopyOnWriteArrayList, CountDownLatch, TimeUnit}
import com.google.gson.Gson
import com.google.gson.reflect.TypeToken
@@ -34,8 +33,8 @@ import org.apache.nlpcraft.common.nlp.core.NCNlpPorterStemmer
import org.apache.nlpcraft.common.util.NCUtils
import org.apache.nlpcraft.model.NCModelFileAdapter
-import scala.collection._
import scala.collection.JavaConverters._
+import scala.collection._
case class NCSynonymsGenerator(url: String, modelPath: String, minFactor: Double) {
// TODO: all string fields
@@ -77,7 +76,6 @@ case class NCSynonymsGenerator(url: String, modelPath: String, minFactor: Double
private def toStem(s: String): String = split(s).map(NCNlpPorterStemmer.stem).mkString(" ")
private def toStemWord(s: String): String = NCNlpPorterStemmer.stem(s)
- // TODO: multithreading.
private def ask(client: CloseableHttpClient, sen: String): Seq[Suggestion] = {
val post = new HttpPost(url)
@@ -105,7 +103,7 @@ case class NCSynonymsGenerator(url: String, modelPath: String, minFactor: Double
require(
word.forall(ch ⇒
ch.isLetterOrDigit ||
- ch == ''' ||
+ ch == '\'' ||
SEPARATORS.contains(ch)
),
s"Unsupported symbols: $word"
@@ -151,39 +149,22 @@ case class NCSynonymsGenerator(url: String, modelPath: String, minFactor: Double
elemId → hs
}.filter(_._2.nonEmpty)
-// val cache = mutable.HashMap.empty[String, Seq[Suggestion]].withDefault(
-// new (String ⇒ Seq[Suggestion]) {
-// override def apply(sen: String): Seq[Suggestion] = ask(client, sen).filter(_.score.toDouble >= minFactor)
-// }
-// )
-
val cache = new java.util.concurrent.ConcurrentHashMap[String, Seq[Suggestion]] ()
-
val allSuggs = new java.util.concurrent.ConcurrentHashMap[String, java.util.List[Suggestion]] ()
- for ((elemId, sens) <- allSens; sen <- sens) {
- NCUtils.asFuture(
- () ⇒ {
- val senSuggs: Seq[Suggestion] = cache.computeIfAbsent(
- sen,
- new Function[String, Seq[Suggestion]]() {
- override def apply(v1: String): Seq[Suggestion] = ask(client, sen)
- }
- )
+ val cdl = new CountDownLatch(allSens.map { case (_, seq) ⇒ seq.size }.sum)
- val elemSugs: util.List[Suggestion] = allSuggs.computeIfAbsent(
- elemId,
- new Function[String, util.List[Suggestion]]() {
- override def apply(v1: String): util.List[Suggestion] = new CopyOnWriteArrayList[Suggestion]()
- }
- )
-
- elemSugs.addAll(senSuggs)
+ for ((elemId, sens) ← allSens; sen ← sens)
+ NCUtils.asFuture(
+ _ ⇒ {
+ allSuggs.computeIfAbsent(elemId, (_: String) ⇒ new CopyOnWriteArrayList[Suggestion]()).
+ addAll(cache.computeIfAbsent(sen, (_: String) ⇒ ask(client, sen)).asJava)
},
- (t: Throwable) ⇒ (),
- (t: Throwable) ⇒ ()
+ (_: Throwable) ⇒ cdl.countDown(),
+ (_: Boolean) ⇒ cdl.countDown()
)
- }
+
+ cdl.await(Long.MaxValue, TimeUnit.MILLISECONDS)
val allSynsStems = elemSyns.flatMap(_._2).flatten.map(_.stem).toSet