You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nlpcraft.apache.org by se...@apache.org on 2020/04/26 08:50:06 UTC

[incubator-nlpcraft] branch NLPCRAFT-41 updated (840ee68 -> 9897281)

This is an automated email from the ASF dual-hosted git repository.

sergeykamov pushed a change to branch NLPCRAFT-41
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git.


    from 840ee68  WIP.
     new 6d7d0e2  WIP.
     add 15ea31d  Fix for NLPCRAFT-39.
     new 9897281  Merge branch 'master' into NLPCRAFT-41

The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 pom.xml                                            | 36 ++++++++++----------
 .../org/apache/nlpcraft/common/util/NCUtils.scala  | 21 ++++++------
 .../model/intent/impl/NCIntentDslCompiler.scala    |  2 +-
 .../model/intent/impl/NCIntentSolverEngine.scala   |  7 ++--
 .../model/tools/synonyms/NCSynonymsGenerator.scala | 39 +++++++++++-----------
 .../probe/mgrs/deploy/NCDeployManager.scala        |  1 +
 .../nlpcraft/probe/mgrs/model/NCModelManager.scala |  2 +-
 .../mgrs/model/NCModelSynonymDslCompiler.scala     | 32 +++++++++---------
 .../server/geo/tools/NCGeoNamesGenerator.scala     |  8 ++---
 .../geo/tools/NCGeoStateNamesGenerator.scala       |  2 +-
 .../geo/tools/NCGeoSyntheticNamesGenerator.scala   |  4 +--
 .../geo/tools/metro/NCGeoMetroGenerator.scala      |  4 +--
 .../org/apache/nlpcraft/server/json/NCJson.scala   |  8 ++---
 .../nlp/core/stanford/NCStanfordAnnotator.scala    |  2 +-
 .../enrichers/stopword/NCStopWordEnricher.scala    |  2 +-
 .../nlpcraft/server/probe/NCProbeManager.scala     | 13 +++++---
 .../nlpcraft/server/query/NCQueryManager.scala     | 15 +++++----
 .../nlpcraft/server/rest/NCRestManager.scala       | 12 +++----
 18 files changed, 104 insertions(+), 106 deletions(-)


[incubator-nlpcraft] 02/02: Merge branch 'master' into NLPCRAFT-41

Posted by se...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

sergeykamov pushed a commit to branch NLPCRAFT-41
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git

commit 9897281117e3ee7a9335abee3c0c8472f8c293eb
Merge: 6d7d0e2 15ea31d
Author: Sergey Kamov <se...@apache.org>
AuthorDate: Sun Apr 26 11:49:48 2020 +0300

    Merge branch 'master' into NLPCRAFT-41

 pom.xml                                            | 36 ++++++++++------------
 .../org/apache/nlpcraft/common/util/NCUtils.scala  | 21 ++++++-------
 .../model/intent/impl/NCIntentDslCompiler.scala    |  2 +-
 .../model/intent/impl/NCIntentSolverEngine.scala   |  7 ++---
 .../probe/mgrs/deploy/NCDeployManager.scala        |  1 +
 .../nlpcraft/probe/mgrs/model/NCModelManager.scala |  2 +-
 .../mgrs/model/NCModelSynonymDslCompiler.scala     | 32 +++++++++----------
 .../server/geo/tools/NCGeoNamesGenerator.scala     |  8 ++---
 .../geo/tools/NCGeoStateNamesGenerator.scala       |  2 +-
 .../geo/tools/NCGeoSyntheticNamesGenerator.scala   |  4 +--
 .../geo/tools/metro/NCGeoMetroGenerator.scala      |  4 +--
 .../org/apache/nlpcraft/server/json/NCJson.scala   |  8 ++---
 .../nlp/core/stanford/NCStanfordAnnotator.scala    |  2 +-
 .../enrichers/stopword/NCStopWordEnricher.scala    |  2 +-
 .../nlpcraft/server/probe/NCProbeManager.scala     | 13 +++++---
 .../nlpcraft/server/query/NCQueryManager.scala     | 15 +++++----
 .../nlpcraft/server/rest/NCRestManager.scala       | 12 +++-----
 17 files changed, 85 insertions(+), 86 deletions(-)


[incubator-nlpcraft] 01/02: WIP.

Posted by se...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

sergeykamov pushed a commit to branch NLPCRAFT-41
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git

commit 6d7d0e2169cf9a96ddac3cb7a7a566d199ac3a8c
Author: Sergey Kamov <se...@apache.org>
AuthorDate: Sun Apr 26 11:44:54 2020 +0300

    WIP.
---
 .../model/tools/synonyms/NCSynonymsGenerator.scala | 39 +++++++++++-----------
 1 file changed, 19 insertions(+), 20 deletions(-)

diff --git a/src/main/scala/org/apache/nlpcraft/model/tools/synonyms/NCSynonymsGenerator.scala b/src/main/scala/org/apache/nlpcraft/model/tools/synonyms/NCSynonymsGenerator.scala
index a0ff611..20b0f18 100644
--- a/src/main/scala/org/apache/nlpcraft/model/tools/synonyms/NCSynonymsGenerator.scala
+++ b/src/main/scala/org/apache/nlpcraft/model/tools/synonyms/NCSynonymsGenerator.scala
@@ -72,6 +72,7 @@ case class NCSynonymsGenerator(url: String, modelPath: String, minFactor: Double
     private def split(s: String): Seq[String] = s.split(" ").toSeq.map(_.trim).filter(_.nonEmpty)
 
     private def toStem(s: String): String = split(s).map(NCNlpPorterStemmer.stem).mkString(" ")
+    private def toStemWord(s: String): String = NCNlpPorterStemmer.stem(s)
 
     // TODO: multithreading.
     private def ask(client: CloseableHttpClient, sen: String): Seq[Suggestion] = {
@@ -96,23 +97,28 @@ case class NCSynonymsGenerator(url: String, modelPath: String, minFactor: Double
 
         val client = HttpClients.createDefault
 
-        case class Word(word: String) {
+        case class Word(word: String, stem: String) {
             require(!word.contains(" "), s"Word cannot contains spaces: $word")
-            require(word.forall(ch ⇒ ch.isLetterOrDigit || ch == ''' || SEPARATORS.contains(ch)), s"Unsupported symbols: $word")
-
-            val stem: String = NCNlpPorterStemmer.stem(word)
+            require(
+                word.forall(ch ⇒
+                    ch.isLetterOrDigit ||
+                    ch == ''' ||
+                    SEPARATORS.contains(ch)
+                ),
+                s"Unsupported symbols: $word"
+            )
         }
 
         val examples =
             mdl.getExamples.asScala.
                 map(s ⇒ SEPARATORS.foldLeft(s)((s, ch) ⇒ s.replaceAll(s"\\$ch", s" $ch "))).
                 map(split).
-                map(_.map(Word)).
+                map(_.map(p ⇒ Word(p, toStemWord(p)))).
                 toSeq
 
         val elemSyns =
             mdl.getElements.asScala.map(e ⇒ e.getId → e.getSynonyms.asScala.flatMap(parser.expand)).
-                map { case (id, seq) ⇒ id → seq.map(txt ⇒ split(txt).map(Word))}.toMap
+                map { case (id, seq) ⇒ id → seq.map(txt ⇒ split(txt).map(p ⇒ Word(p, toStemWord(p))))}.toMap
 
         val cache = mutable.HashMap.empty[String, Seq[Suggestion]].withDefault(
             new (String ⇒ Seq[Suggestion]) {
@@ -123,26 +129,19 @@ case class NCSynonymsGenerator(url: String, modelPath: String, minFactor: Double
         val allSuggs =
             elemSyns.map {
                 case (elemId, elemSyns) ⇒
-                    val stemsSyns: Seq[(String, String)] =
-                        elemSyns.filter(_.size == 1).map(words ⇒ words.head.stem → words.head.word)
+                    val elemSingleSyns = elemSyns.filter(_.size == 1).map(_.head)
+                    val elemStems = elemSingleSyns.map(_.stem)
 
                     val hs: Seq[Suggestion] =
-                        examples.flatMap(exWords ⇒ {
-                            val exStems = exWords.map(_.stem)
-
-                            val idxs =
-                                exStems.flatMap(stem ⇒
-                                    stemsSyns.find(_._1 == stem) match {
-                                        case Some(p) ⇒ Some(exStems.indexOf(p._1))
-                                        case None ⇒ None
-                                    }
-                                )
+                        examples.flatMap(example ⇒ {
+                            val exStems = example.map(_.stem)
+                            val idxs = exStems.flatMap(s ⇒ if (elemStems.contains(s)) Some(exStems.indexOf(s)) else None)
 
                             if (idxs.nonEmpty)
-                                stemsSyns.map(_._2).flatMap(syn ⇒
+                                elemSingleSyns.map(_.word).flatMap(syn ⇒
                                     idxs.flatMap(idx ⇒
                                         cache(
-                                            exWords.
+                                            example.
                                             zipWithIndex.map { case (w, i1) ⇒ if (idxs.contains(i1)) syn else w.word }.
                                             zipWithIndex.map { case (s, i2) ⇒ if (i2 == idx) s"$s#" else s}.
                                             mkString(" "))