You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nlpcraft.apache.org by se...@apache.org on 2021/09/14 16:41:49 UTC

[incubator-nlpcraft] branch NLPCRAFT-443 updated: WIP.

This is an automated email from the ASF dual-hosted git repository.

sergeykamov pushed a commit to branch NLPCRAFT-443
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git


The following commit(s) were added to refs/heads/NLPCRAFT-443 by this push:
     new 901187b  WIP.
901187b is described below

commit 901187b2711722c4dc6bc049373fa063b00f46c3
Author: Sergey Kamov <sk...@gmail.com>
AuthorDate: Tue Sep 14 19:41:39 2021 +0300

    WIP.
---
 nlpcraft/src/main/resources/stopwords/stop_words.txt  |  1 +
 .../nlp/enrichers/stopword/NCStopWordEnricher.scala   | 19 ++++++++++---------
 .../enrichers/model/NCEnricherNestedModelSpec3.scala  |  1 +
 3 files changed, 12 insertions(+), 9 deletions(-)

diff --git a/nlpcraft/src/main/resources/stopwords/stop_words.txt b/nlpcraft/src/main/resources/stopwords/stop_words.txt
index 3629397..d7b7409 100644
--- a/nlpcraft/src/main/resources/stopwords/stop_words.txt
+++ b/nlpcraft/src/main/resources/stopwords/stop_words.txt
@@ -63,6 +63,7 @@
 
 # POSES list exceptions.
 ~may
+~no
 
 # Postfixes list.
 *ent | ~NN ~NNS ~NNP ~NNPS
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/stopword/NCStopWordEnricher.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/stopword/NCStopWordEnricher.scala
index 03e0ec9..1af22cb 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/stopword/NCStopWordEnricher.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/stopword/NCStopWordEnricher.scala
@@ -246,15 +246,6 @@ object NCStopWordEnricher extends NCProbeEnricher {
         if (del.nonEmpty) {
             del.foreach(t => require(t.isNlp))
 
-            // TODO:
-            logger.info(
-                s"Stopwords deleted from sentence [" +
-                s"srvReqId=${ns.srvReqId}, " +
-                s"text=${ns.text}, " +
-                s"stopWords=${del.map(p => s"${p.origText}(index=${p.wordIndexes.head})").mkString("|")}" +
-                s"]"
-            )
-
             val delIdxs = del.flatMap(_.wordIndexes).sorted
 
             val old = ns.tokens.clone()
@@ -273,6 +264,16 @@ object NCStopWordEnricher extends NCProbeEnricher {
                     t.add(n.clone(tokIdxs, wordIdxs))
                 })
             })
+
+            // TODO:
+            logger.info(
+                s"Stopwords deleted from sentence [" +
+                s"srvReqId=${ns.srvReqId}, " +
+                s"originText=${ns.text}, " +
+                s"fixedText=${ns.tokens.map(_.origText).mkString(" ")}, " +
+                s"stopWords=${del.map(p => s"${p.origText}(index=${p.wordIndexes.head})").mkString("|")}" +
+                s"]"
+            )
         }
     }
 
diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCEnricherNestedModelSpec3.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCEnricherNestedModelSpec3.scala
index 2303e30..0b10a61 100644
--- a/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCEnricherNestedModelSpec3.scala
+++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCEnricherNestedModelSpec3.scala
@@ -36,6 +36,7 @@ class NCNestedTestModel3 extends NCModelAdapter("nlpcraft.nested3.test.mdl", "Ne
 
     override def getAbstractTokens: util.Set[String] = Set("e1").asJava
     override def getEnabledBuiltInTokens: util.Set[String] = Set.empty[String].asJava
+    override def getExcludedStopWords: util.Set[String] = Set("a").asJava
 
     @NCIntent("intent=onE2 term(t1)={# == 'e2'}[12, 100]")
     def onAB(): NCResult = NCResult.text("OK")