You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nlpcraft.apache.org by se...@apache.org on 2020/04/26 08:50:06 UTC
[incubator-nlpcraft] branch NLPCRAFT-41 updated (840ee68 -> 9897281)
This is an automated email from the ASF dual-hosted git repository.
sergeykamov pushed a change to branch NLPCRAFT-41
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git.
from 840ee68 WIP.
new 6d7d0e2 WIP.
add 15ea31d Fix for NLPCRAFT-39.
new 9897281 Merge branch 'master' into NLPCRAFT-41
The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails. The revisions
listed as "add" were already present in the repository and have only
been added to this reference.
Summary of changes:
pom.xml | 36 ++++++++++----------
.../org/apache/nlpcraft/common/util/NCUtils.scala | 21 ++++++------
.../model/intent/impl/NCIntentDslCompiler.scala | 2 +-
.../model/intent/impl/NCIntentSolverEngine.scala | 7 ++--
.../model/tools/synonyms/NCSynonymsGenerator.scala | 39 +++++++++++-----------
.../probe/mgrs/deploy/NCDeployManager.scala | 1 +
.../nlpcraft/probe/mgrs/model/NCModelManager.scala | 2 +-
.../mgrs/model/NCModelSynonymDslCompiler.scala | 32 +++++++++---------
.../server/geo/tools/NCGeoNamesGenerator.scala | 8 ++---
.../geo/tools/NCGeoStateNamesGenerator.scala | 2 +-
.../geo/tools/NCGeoSyntheticNamesGenerator.scala | 4 +--
.../geo/tools/metro/NCGeoMetroGenerator.scala | 4 +--
.../org/apache/nlpcraft/server/json/NCJson.scala | 8 ++---
.../nlp/core/stanford/NCStanfordAnnotator.scala | 2 +-
.../enrichers/stopword/NCStopWordEnricher.scala | 2 +-
.../nlpcraft/server/probe/NCProbeManager.scala | 13 +++++---
.../nlpcraft/server/query/NCQueryManager.scala | 15 +++++----
.../nlpcraft/server/rest/NCRestManager.scala | 12 +++----
18 files changed, 104 insertions(+), 106 deletions(-)
[incubator-nlpcraft] 02/02: Merge branch 'master' into NLPCRAFT-41
Posted by se...@apache.org.
This is an automated email from the ASF dual-hosted git repository.
sergeykamov pushed a commit to branch NLPCRAFT-41
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
commit 9897281117e3ee7a9335abee3c0c8472f8c293eb
Merge: 6d7d0e2 15ea31d
Author: Sergey Kamov <se...@apache.org>
AuthorDate: Sun Apr 26 11:49:48 2020 +0300
Merge branch 'master' into NLPCRAFT-41
pom.xml | 36 ++++++++++------------
.../org/apache/nlpcraft/common/util/NCUtils.scala | 21 ++++++-------
.../model/intent/impl/NCIntentDslCompiler.scala | 2 +-
.../model/intent/impl/NCIntentSolverEngine.scala | 7 ++---
.../probe/mgrs/deploy/NCDeployManager.scala | 1 +
.../nlpcraft/probe/mgrs/model/NCModelManager.scala | 2 +-
.../mgrs/model/NCModelSynonymDslCompiler.scala | 32 +++++++++----------
.../server/geo/tools/NCGeoNamesGenerator.scala | 8 ++---
.../geo/tools/NCGeoStateNamesGenerator.scala | 2 +-
.../geo/tools/NCGeoSyntheticNamesGenerator.scala | 4 +--
.../geo/tools/metro/NCGeoMetroGenerator.scala | 4 +--
.../org/apache/nlpcraft/server/json/NCJson.scala | 8 ++---
.../nlp/core/stanford/NCStanfordAnnotator.scala | 2 +-
.../enrichers/stopword/NCStopWordEnricher.scala | 2 +-
.../nlpcraft/server/probe/NCProbeManager.scala | 13 +++++---
.../nlpcraft/server/query/NCQueryManager.scala | 15 +++++----
.../nlpcraft/server/rest/NCRestManager.scala | 12 +++-----
17 files changed, 85 insertions(+), 86 deletions(-)
[incubator-nlpcraft] 01/02: WIP.
Posted by se...@apache.org.
This is an automated email from the ASF dual-hosted git repository.
sergeykamov pushed a commit to branch NLPCRAFT-41
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
commit 6d7d0e2169cf9a96ddac3cb7a7a566d199ac3a8c
Author: Sergey Kamov <se...@apache.org>
AuthorDate: Sun Apr 26 11:44:54 2020 +0300
WIP.
---
.../model/tools/synonyms/NCSynonymsGenerator.scala | 39 +++++++++++-----------
1 file changed, 19 insertions(+), 20 deletions(-)
diff --git a/src/main/scala/org/apache/nlpcraft/model/tools/synonyms/NCSynonymsGenerator.scala b/src/main/scala/org/apache/nlpcraft/model/tools/synonyms/NCSynonymsGenerator.scala
index a0ff611..20b0f18 100644
--- a/src/main/scala/org/apache/nlpcraft/model/tools/synonyms/NCSynonymsGenerator.scala
+++ b/src/main/scala/org/apache/nlpcraft/model/tools/synonyms/NCSynonymsGenerator.scala
@@ -72,6 +72,7 @@ case class NCSynonymsGenerator(url: String, modelPath: String, minFactor: Double
private def split(s: String): Seq[String] = s.split(" ").toSeq.map(_.trim).filter(_.nonEmpty)
private def toStem(s: String): String = split(s).map(NCNlpPorterStemmer.stem).mkString(" ")
+ private def toStemWord(s: String): String = NCNlpPorterStemmer.stem(s)
// TODO: multithreading.
private def ask(client: CloseableHttpClient, sen: String): Seq[Suggestion] = {
@@ -96,23 +97,28 @@ case class NCSynonymsGenerator(url: String, modelPath: String, minFactor: Double
val client = HttpClients.createDefault
- case class Word(word: String) {
+ case class Word(word: String, stem: String) {
require(!word.contains(" "), s"Word cannot contains spaces: $word")
- require(word.forall(ch ⇒ ch.isLetterOrDigit || ch == ''' || SEPARATORS.contains(ch)), s"Unsupported symbols: $word")
-
- val stem: String = NCNlpPorterStemmer.stem(word)
+ require(
+ word.forall(ch ⇒
+ ch.isLetterOrDigit ||
+ ch == ''' ||
+ SEPARATORS.contains(ch)
+ ),
+ s"Unsupported symbols: $word"
+ )
}
val examples =
mdl.getExamples.asScala.
map(s ⇒ SEPARATORS.foldLeft(s)((s, ch) ⇒ s.replaceAll(s"\\$ch", s" $ch "))).
map(split).
- map(_.map(Word)).
+ map(_.map(p ⇒ Word(p, toStemWord(p)))).
toSeq
val elemSyns =
mdl.getElements.asScala.map(e ⇒ e.getId → e.getSynonyms.asScala.flatMap(parser.expand)).
- map { case (id, seq) ⇒ id → seq.map(txt ⇒ split(txt).map(Word))}.toMap
+ map { case (id, seq) ⇒ id → seq.map(txt ⇒ split(txt).map(p ⇒ Word(p, toStemWord(p))))}.toMap
val cache = mutable.HashMap.empty[String, Seq[Suggestion]].withDefault(
new (String ⇒ Seq[Suggestion]) {
@@ -123,26 +129,19 @@ case class NCSynonymsGenerator(url: String, modelPath: String, minFactor: Double
val allSuggs =
elemSyns.map {
case (elemId, elemSyns) ⇒
- val stemsSyns: Seq[(String, String)] =
- elemSyns.filter(_.size == 1).map(words ⇒ words.head.stem → words.head.word)
+ val elemSingleSyns = elemSyns.filter(_.size == 1).map(_.head)
+ val elemStems = elemSingleSyns.map(_.stem)
val hs: Seq[Suggestion] =
- examples.flatMap(exWords ⇒ {
- val exStems = exWords.map(_.stem)
-
- val idxs =
- exStems.flatMap(stem ⇒
- stemsSyns.find(_._1 == stem) match {
- case Some(p) ⇒ Some(exStems.indexOf(p._1))
- case None ⇒ None
- }
- )
+ examples.flatMap(example ⇒ {
+ val exStems = example.map(_.stem)
+ val idxs = exStems.flatMap(s ⇒ if (elemStems.contains(s)) Some(exStems.indexOf(s)) else None)
if (idxs.nonEmpty)
- stemsSyns.map(_._2).flatMap(syn ⇒
+ elemSingleSyns.map(_.word).flatMap(syn ⇒
idxs.flatMap(idx ⇒
cache(
- exWords.
+ example.
zipWithIndex.map { case (w, i1) ⇒ if (idxs.contains(i1)) syn else w.word }.
zipWithIndex.map { case (s, i2) ⇒ if (i2 == idx) s"$s#" else s}.
mkString(" "))