You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nlpcraft.apache.org by se...@apache.org on 2022/03/17 09:12:07 UTC
[incubator-nlpcraft] branch master updated: Stems lowecase usage fixed.
This is an automated email from the ASF dual-hosted git repository.
sergeykamov pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
The following commit(s) were added to refs/heads/master by this push:
new 9d16b07 Stems lowecase usage fixed.
9d16b07 is described below
commit 9d16b07d1dfded1895992e8eb555a4c9d61b1f78
Author: Sergey Kamov <sk...@gmail.com>
AuthorDate: Thu Mar 17 12:11:57 2022 +0300
Stems lowecase usage fixed.
---
.../src/main/scala/org/apache/nlpcraft/NCModelPipelineBuilder.java | 2 +-
.../nlp/entity/parser/impl/semantic/NCSemanticEntityParserImpl.scala | 4 ++--
.../nlp/entity/parser/impl/semantic/NCSemanticSynonymsProcessor.scala | 4 ++--
.../src/test/scala/org/apache/nlpcraft/nlp/util/NCTestUtils.scala | 2 +-
4 files changed, 6 insertions(+), 6 deletions(-)
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelPipelineBuilder.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelPipelineBuilder.java
index 991ddb9..2efb746 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelPipelineBuilder.java
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelPipelineBuilder.java
@@ -58,7 +58,7 @@ public class NCModelPipelineBuilder {
@Override
public synchronized String stem(String txt) {
- return ps.stem(txt.toLowerCase()); // TODO:
+ return ps.stem(txt);
}
};
}
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/impl/semantic/NCSemanticEntityParserImpl.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/impl/semantic/NCSemanticEntityParserImpl.scala
index 3bbdc48..f960c9d 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/impl/semantic/NCSemanticEntityParserImpl.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/impl/semantic/NCSemanticEntityParserImpl.scala
@@ -211,7 +211,7 @@ class NCSemanticEntityParserImpl(
val toks = toksList.asScala.toSeq
if toks.exists(_.get[String]("stopword") == null) then warnMissedProperty("stopword")
- val stems = toks.map(p => p -> stemmer.stem(p.getText)).toMap
+ val stems = toks.map(p => p -> stemmer.stem(p.getText.toLowerCase)).toMap
val stems4Lemms =
var ok = true
val seq =
@@ -221,7 +221,7 @@ class NCSemanticEntityParserImpl(
t -> lemma
if ok then
- seq.toMap.map { (tok, lemma) => tok -> stemmer.stem(lemma) }
+ seq.toMap.map { (tok, lemma) => tok -> stemmer.stem(lemma.toLowerCase) }
else
warnMissedProperty("lemma")
Map.empty
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/impl/semantic/NCSemanticSynonymsProcessor.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/impl/semantic/NCSemanticSynonymsProcessor.scala
index 3f8bdc5..f047b42 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/impl/semantic/NCSemanticSynonymsProcessor.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/impl/semantic/NCSemanticSynonymsProcessor.scala
@@ -197,7 +197,7 @@ private[impl] object NCSemanticSynonymsProcessor extends LazyLogging:
else
regex.used = true
Some(regex.mkChunk())
- case None => Option(NCSemanticSynonymChunk(TEXT, tok.getText, stemmer.stem(tok.getText)))
+ case None => Option(NCSemanticSynonymChunk(TEXT, tok.getText, stemmer.stem(tok.getText.toLowerCase)))
).toSeq
}).toSeq
@@ -236,7 +236,7 @@ private[impl] object NCSemanticSynonymsProcessor extends LazyLogging:
def add(syns: Seq[NCSemanticSynonym]): Unit = buf ++= syns.map(Holder(_, elemId))
def addSpec(txt: String, value: String = null): Unit =
- buf += Holder(NCSemanticSynonym(Seq(NCSemanticSynonymChunk(TEXT, txt, stemmer.stem(txt))), value), elemId)
+ buf += Holder(NCSemanticSynonym(Seq(NCSemanticSynonymChunk(TEXT, txt, stemmer.stem(txt.toLowerCase))), value), elemId)
addSpec(elemId)
diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/util/NCTestUtils.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/util/NCTestUtils.scala
index b73af1d..ec74cc2 100644
--- a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/util/NCTestUtils.scala
+++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/util/NCTestUtils.scala
@@ -129,7 +129,7 @@ object NCTestUtils:
private def mkSemanticStemmer: NCSemanticStemmer =
new NCSemanticStemmer():
private val ps = new PorterStemmer
- override def stem(txt: String): String = ps.synchronized { ps.stem(txt.toLowerCase) }
+ override def stem(txt: String): String = ps.synchronized { ps.stem(txt) }
/**