You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nlpcraft.apache.org by se...@apache.org on 2022/03/17 09:12:07 UTC

[incubator-nlpcraft] branch master updated: Stems lowecase usage fixed.

This is an automated email from the ASF dual-hosted git repository.

sergeykamov pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git


The following commit(s) were added to refs/heads/master by this push:
     new 9d16b07  Stems lowecase usage fixed.
9d16b07 is described below

commit 9d16b07d1dfded1895992e8eb555a4c9d61b1f78
Author: Sergey Kamov <sk...@gmail.com>
AuthorDate: Thu Mar 17 12:11:57 2022 +0300

    Stems lowecase usage fixed.
---
 .../src/main/scala/org/apache/nlpcraft/NCModelPipelineBuilder.java    | 2 +-
 .../nlp/entity/parser/impl/semantic/NCSemanticEntityParserImpl.scala  | 4 ++--
 .../nlp/entity/parser/impl/semantic/NCSemanticSynonymsProcessor.scala | 4 ++--
 .../src/test/scala/org/apache/nlpcraft/nlp/util/NCTestUtils.scala     | 2 +-
 4 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelPipelineBuilder.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelPipelineBuilder.java
index 991ddb9..2efb746 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelPipelineBuilder.java
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelPipelineBuilder.java
@@ -58,7 +58,7 @@ public class NCModelPipelineBuilder {
 
             @Override
             public synchronized String stem(String txt) {
-                return ps.stem(txt.toLowerCase()); // TODO:
+                return ps.stem(txt);
             }
         };
     }
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/impl/semantic/NCSemanticEntityParserImpl.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/impl/semantic/NCSemanticEntityParserImpl.scala
index 3bbdc48..f960c9d 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/impl/semantic/NCSemanticEntityParserImpl.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/impl/semantic/NCSemanticEntityParserImpl.scala
@@ -211,7 +211,7 @@ class NCSemanticEntityParserImpl(
         val toks = toksList.asScala.toSeq
         if toks.exists(_.get[String]("stopword") == null) then warnMissedProperty("stopword")
 
-        val stems = toks.map(p => p -> stemmer.stem(p.getText)).toMap
+        val stems = toks.map(p => p -> stemmer.stem(p.getText.toLowerCase)).toMap
         val stems4Lemms =
             var ok = true
             val seq =
@@ -221,7 +221,7 @@ class NCSemanticEntityParserImpl(
                         t -> lemma
 
             if ok then
-                seq.toMap.map { (tok, lemma) => tok -> stemmer.stem(lemma) }
+                seq.toMap.map { (tok, lemma) => tok -> stemmer.stem(lemma.toLowerCase) }
             else
                 warnMissedProperty("lemma")
                 Map.empty
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/impl/semantic/NCSemanticSynonymsProcessor.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/impl/semantic/NCSemanticSynonymsProcessor.scala
index 3f8bdc5..f047b42 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/impl/semantic/NCSemanticSynonymsProcessor.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/impl/semantic/NCSemanticSynonymsProcessor.scala
@@ -197,7 +197,7 @@ private[impl] object NCSemanticSynonymsProcessor extends LazyLogging:
                             else
                                 regex.used = true
                                 Some(regex.mkChunk())
-                        case None => Option(NCSemanticSynonymChunk(TEXT, tok.getText, stemmer.stem(tok.getText)))
+                        case None => Option(NCSemanticSynonymChunk(TEXT, tok.getText, stemmer.stem(tok.getText.toLowerCase)))
                 ).toSeq
             }).toSeq
 
@@ -236,7 +236,7 @@ private[impl] object NCSemanticSynonymsProcessor extends LazyLogging:
 
             def add(syns: Seq[NCSemanticSynonym]): Unit = buf ++= syns.map(Holder(_, elemId))
             def addSpec(txt: String, value: String = null): Unit =
-                buf += Holder(NCSemanticSynonym(Seq(NCSemanticSynonymChunk(TEXT, txt, stemmer.stem(txt))), value), elemId)
+                buf += Holder(NCSemanticSynonym(Seq(NCSemanticSynonymChunk(TEXT, txt, stemmer.stem(txt.toLowerCase))), value), elemId)
 
             addSpec(elemId)
 
diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/util/NCTestUtils.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/util/NCTestUtils.scala
index b73af1d..ec74cc2 100644
--- a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/util/NCTestUtils.scala
+++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/util/NCTestUtils.scala
@@ -129,7 +129,7 @@ object NCTestUtils:
     private def mkSemanticStemmer: NCSemanticStemmer =
         new NCSemanticStemmer():
             private val ps = new PorterStemmer
-            override def stem(txt: String): String = ps.synchronized { ps.stem(txt.toLowerCase) }
+            override def stem(txt: String): String = ps.synchronized { ps.stem(txt) }
 
 
     /**