You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nlpcraft.apache.org by se...@apache.org on 2022/12/12 13:38:02 UTC

[incubator-nlpcraft] branch NLPCRAFT-520 updated: WIP.

This is an automated email from the ASF dual-hosted git repository.

sergeykamov pushed a commit to branch NLPCRAFT-520
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git


The following commit(s) were added to refs/heads/NLPCRAFT-520 by this push:
     new 2205ae2f WIP.
2205ae2f is described below

commit 2205ae2f691a33f69f5f6fd572e2418435f9e0d4
Author: Sergey Kamov <sk...@gmail.com>
AuthorDate: Mon Dec 12 17:38:08 2022 +0400

    WIP.
---
 .../components/PizzeriaModelPipeline.scala         |  8 ++----
 .../org/apache/nlpcraft/NCPipelineBuilder.scala    | 14 +++-------
 .../apache/nlpcraft/nlp/common/NCEnStemmer.scala   | 32 ++++++++++++++++++++++
 .../nlp/enrichers/NCDictionaryTokenEnricher.scala  |  2 +-
 .../nlp/enrichers/NCEnStopWordsTokenEnricher.scala | 20 ++++++++++----
 .../nlp/enrichers/NCOpenNLPTokenEnricher.scala     |  4 +--
 .../nlp/enrichers/NCQuotesTokenEnricher.scala      |  3 +-
 .../nlp/enrichers/NCSwearWordsTokenEnricher.scala  |  3 +-
 .../nlpcraft/nlp/parsers/NCNLPEntityParser.scala   |  7 +++--
 .../nlp/parsers/NCOpenNLPEntityParser.scala        |  5 ++--
 .../nlp/parsers/NCOpenNLPTokenParser.scala         |  5 ++--
 .../nlpcraft/nlp/parsers/NCSemanticElement.scala   |  6 ++--
 .../nlp/parsers/NCSemanticEntityParser.scala       |  3 +-
 .../enrichers/NCSwearWordsTokenEnricherSpec.scala  |  8 ++----
 .../org/apache/nlpcraft/nlp/util/NCTestUtils.scala | 18 +++---------
 15 files changed, 80 insertions(+), 58 deletions(-)

diff --git a/nlpcraft-examples/pizzeria/src/main/scala/org/apache/nlpcraft/examples/pizzeria/components/PizzeriaModelPipeline.scala b/nlpcraft-examples/pizzeria/src/main/scala/org/apache/nlpcraft/examples/pizzeria/components/PizzeriaModelPipeline.scala
index e21066a7..655072d6 100644
--- a/nlpcraft-examples/pizzeria/src/main/scala/org/apache/nlpcraft/examples/pizzeria/components/PizzeriaModelPipeline.scala
+++ b/nlpcraft-examples/pizzeria/src/main/scala/org/apache/nlpcraft/examples/pizzeria/components/PizzeriaModelPipeline.scala
@@ -1,12 +1,11 @@
 package org.apache.nlpcraft.examples.pizzeria.components
 
 import edu.stanford.nlp.pipeline.StanfordCoreNLP
-import opennlp.tools.stemmer.PorterStemmer
 import org.apache.nlpcraft.nlp.parsers.*
 import org.apache.nlpcraft.nlp.entity.parser.stanford.NCStanfordNLPEntityParser
 import org.apache.nlpcraft.nlp.token.parser.stanford.NCStanfordNLPTokenParser
 import org.apache.nlpcraft.*
-import org.apache.nlpcraft.nlp.common.NCStemmer
+import org.apache.nlpcraft.nlp.common.{NCEnStemmer, NCStemmer}
 import org.apache.nlpcraft.nlp.enrichers.NCEnStopWordsTokenEnricher
 import org.apache.nlpcraft.nlp.parsers.NCSemanticEntityParser
 
@@ -21,9 +20,6 @@ object PizzeriaModelPipeline:
             props.setProperty("annotators", "tokenize, ssplit, pos, lemma, ner")
             new StanfordCoreNLP(props)
         val tokParser = new NCStanfordNLPTokenParser(stanford)
-        val stemmer = new NCStemmer():
-            private val ps = new PorterStemmer
-            override def stem(word: String): String = ps.synchronized { ps.stem(word) }
 
         import PizzeriaOrderMapperDesc as D
 
@@ -31,7 +27,7 @@ object PizzeriaModelPipeline:
             withTokenParser(tokParser).
             withTokenEnricher(new NCEnStopWordsTokenEnricher()).
             withEntityParser(new NCStanfordNLPEntityParser(stanford, Set("number"))).
-            withEntityParser(NCSemanticEntityParser(stemmer, tokParser, "pizzeria_model.yaml")).
+            withEntityParser(NCSemanticEntityParser(new NCEnStemmer, tokParser, "pizzeria_model.yaml")).
             withEntityMapper(PizzeriaOrderMapper(extra = D("ord:pizza:size", "ord:pizza:size:value"), dests = D("ord:pizza", "ord:pizza:size"))).
             withEntityMapper(PizzeriaOrderMapper(extra = D("stanford:number", "stanford:number:nne"), dests = D("ord:pizza", "ord:pizza:qty"), D("ord:drink", "ord:drink:qty"))).
             withEntityValidator(new PizzeriaOrderValidator()).
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCPipelineBuilder.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCPipelineBuilder.scala
index 08672995..6452cabc 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCPipelineBuilder.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCPipelineBuilder.scala
@@ -17,9 +17,8 @@
 
 package org.apache.nlpcraft
 
-import opennlp.tools.stemmer.PorterStemmer
 import org.apache.nlpcraft.internal.util.NCResourceReader
-import org.apache.nlpcraft.nlp.common.NCStemmer
+import org.apache.nlpcraft.nlp.common.{NCEnStemmer, NCStemmer}
 import org.apache.nlpcraft.nlp.parsers.*
 import org.apache.nlpcraft.nlp.enrichers.*
 
@@ -40,11 +39,6 @@ class NCPipelineBuilder:
     private val entMappers: Buf[NCEntityMapper] = Buf.empty
     private val varFilters: Buf[NCVariantFilter] = Buf.empty
 
-    private def mkEnStemmer: NCStemmer =
-        new NCStemmer:
-            final private val ps: PorterStemmer = new PorterStemmer
-            override def stem(word: String): String = ps.stem(word)
-
     private def mkEnOpenNLPTokenParser: NCOpenNLPTokenParser =
         new NCOpenNLPTokenParser(NCResourceReader.getPath("opennlp/en-token.bin"))
 
@@ -222,7 +216,7 @@ class NCPipelineBuilder:
         tokEnrichers += new NCEnStopWordsTokenEnricher
         tokEnrichers += new NCSwearWordsTokenEnricher(
             NCResourceReader.getPath("badfilter/swear_words.txt"),
-            mkEnStemmer
+            new NCEnStemmer
         )
         tokEnrichers += new NCQuotesTokenEnricher
         tokEnrichers += new NCDictionaryTokenEnricher("moby/354984si.ngl")
@@ -266,7 +260,7 @@ class NCPipelineBuilder:
         lang.toUpperCase match
             case "EN" =>
                 setEnComponents()
-                entParsers += NCSemanticEntityParser(mkEnStemmer, mkEnOpenNLPTokenParser, macros, elms)
+                entParsers += NCSemanticEntityParser(new NCEnStemmer, mkEnOpenNLPTokenParser, macros, elms)
             case _ => require(false, s"Unsupported language: $lang")
         this
 
@@ -332,7 +326,7 @@ class NCPipelineBuilder:
         lang.toUpperCase match
             case "EN" =>
                 setEnComponents()
-                this.entParsers += NCSemanticEntityParser(mkEnStemmer, mkEnOpenNLPTokenParser, mdlSrc)
+                this.entParsers += NCSemanticEntityParser(new NCEnStemmer, mkEnOpenNLPTokenParser, mdlSrc)
             case _ => require(false, s"Unsupported language: $lang")
         this
 
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/common/NCEnStemmer.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/common/NCEnStemmer.scala
new file mode 100644
index 00000000..7e098e76
--- /dev/null
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/common/NCEnStemmer.scala
@@ -0,0 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nlpcraft.nlp.common
+
+import opennlp.tools.stemmer.PorterStemmer
+import org.apache.nlpcraft.nlp.parsers.*
+
+/**
+  * English language [[NCStemmer]] implementation, based on
+  * [[https://opennlp.apache.org/ OpenNLP]] Porter Stemmer.
+  * Look more [[https://tartarus.org/martin/PorterStemmer here]].
+  */
+class NCEnStemmer extends NCStemmer:
+    private val stemmer = new PorterStemmer
+
+    /** @inheritdoc */
+    def stem(word: String): String = stemmer.stem(word)
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCDictionaryTokenEnricher.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCDictionaryTokenEnricher.scala
index a394da37..cf17817e 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCDictionaryTokenEnricher.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCDictionaryTokenEnricher.scala
@@ -42,7 +42,7 @@ class NCDictionaryTokenEnricher(dictRes: String) extends NCTokenEnricher:
     init()
 
     private def init(): Unit = dict = NCUtils.readResource(dictRes).toSet
-    private def getLemma(t: NCToken): String = t.get("lemma").getOrElse(throw new NCException("'lemma'' property not found in token."))
+    private def getLemma(t: NCToken): String = t.get("lemma").getOrElse(throw new NCException("Lemma not found in token."))
 
     /** @inheritdoc */
     override def enrich(req: NCRequest, cfg: NCModelConfig, toks: List[NCToken]): Unit =
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCEnStopWordsTokenEnricher.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCEnStopWordsTokenEnricher.scala
index 8de3f03d..cfd383ef 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCEnStopWordsTokenEnricher.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCEnStopWordsTokenEnricher.scala
@@ -18,9 +18,9 @@
 package org.apache.nlpcraft.nlp.enrichers
 
 import com.typesafe.scalalogging.LazyLogging
-import opennlp.tools.stemmer.PorterStemmer
 import org.apache.nlpcraft.*
 import org.apache.nlpcraft.internal.util.NCUtils
+import org.apache.nlpcraft.nlp.common.{NCEnStemmer, NCStemmer}
 
 import java.io.*
 import java.util
@@ -177,12 +177,19 @@ import org.apache.nlpcraft.nlp.enrichers.NCEnStopWordsTokenEnricher.*
   * contain token's lemma and part of speech. You can configure [[NCOpenNLPTokenEnricher]] for English language
   * that provides this metadata properties before this enricher in your [[NCPipeline pipeline]].
   *
+  * @see [[NCEnStemmer]]
+  *
   * @param addStopsSet User defined collection of additional stop-words.
+  *  These word will be tried to match based on `stemmer` implementation.
   * @param exclStopsSet User defined collection of exceptions, that is words which should not be marked as stop-words during processing.
+  *  These word will be tried to match based on `stemmer` implementation.
+  * @param stemmer English stemmer implementation.
   */
-class NCEnStopWordsTokenEnricher(addStopsSet: Set[String] = Set.empty, exclStopsSet: Set[String] = Set.empty) extends NCTokenEnricher with LazyLogging:
-    private final val stemmer = new PorterStemmer
-
+class NCEnStopWordsTokenEnricher(
+    addStopsSet: Set[String] = Set.empty,
+    exclStopsSet: Set[String] = Set.empty,
+    stemmer: NCStemmer = new NCEnStemmer
+) extends NCTokenEnricher with LazyLogging:
     private var addStems: Set[String] = _
     private var exclStems: Set[String] = _
     private var percents: Set[String] = _
@@ -324,7 +331,7 @@ class NCEnStopWordsTokenEnricher(addStopsSet: Set[String] = Set.empty, exclStops
       * Parses configuration template.
       *
       * @param lines Configuration file content.
-      * @return Holder and `is-exception` flag.
+      * @return Holder and is-exception flag.
       */
     private def readStopWords(lines: Seq[String]): Map[Boolean, StopWordHolder] =
         // 1. Prepares accumulation data structure.
@@ -461,7 +468,7 @@ class NCEnStopWordsTokenEnricher(addStopsSet: Set[String] = Set.empty, exclStops
       * @param ns Sentence.
       * @param stopPoses Stop POSes.
       * @param lastIdx Last index.
-      * @param isException Function which return `stop word exception` flag.
+      * @param isException Function which return stop word exception flag.
       * @param stops Stopwords tokens.
       */
     @tailrec
@@ -517,6 +524,7 @@ class NCEnStopWordsTokenEnricher(addStopsSet: Set[String] = Set.empty, exclStops
 
         processCommonStops0(ns)
 
+    /** @inheritdoc */
     override def enrich(req: NCRequest, cfg: NCModelConfig, toks: List[NCToken]): Unit =
         // Stop words and exceptions caches for this sentence.
         val cacheSw = mutable.HashMap.empty[Seq[NCToken], Boolean]
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCOpenNLPTokenEnricher.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCOpenNLPTokenEnricher.scala
index 7ba30164..270ca60a 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCOpenNLPTokenEnricher.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCOpenNLPTokenEnricher.scala
@@ -20,7 +20,6 @@ package org.apache.nlpcraft.nlp.enrichers
 import com.typesafe.scalalogging.LazyLogging
 import opennlp.tools.lemmatizer.DictionaryLemmatizer
 import opennlp.tools.postag.*
-import opennlp.tools.stemmer.PorterStemmer
 import org.apache.nlpcraft.*
 import org.apache.nlpcraft.internal.util.*
 
@@ -28,7 +27,7 @@ import java.io.*
 import scala.concurrent.ExecutionContext
 
 /**
-  * [[https://opennlp.apache.org/ OpenNLP]] based language independent [[NCTokenEnricher enricher]].
+  * [[https://opennlp.apache.org/ OpenNLP]] based language independent [[NCTokenEnricher token enricher]].
   *
   * This enricher adds `lemma` and `pos` (part-of-speech) string [[NCPropertyMap metadata]] property to the [[NCToken token]]
   * instance.
@@ -66,6 +65,7 @@ class NCOpenNLPTokenEnricher(posMdlRes: String = null, lemmaDicRes: String = nul
             )
         )(ExecutionContext.Implicits.global)
 
+    /** @inheritdoc */
     override def enrich(req: NCRequest, cfg: NCModelConfig, toks: List[NCToken]): Unit =
         val txts = toks.map(_.getText).toArray
 
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCQuotesTokenEnricher.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCQuotesTokenEnricher.scala
index 6f82ca76..695c27c7 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCQuotesTokenEnricher.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCQuotesTokenEnricher.scala
@@ -21,7 +21,7 @@ import com.typesafe.scalalogging.LazyLogging
 import org.apache.nlpcraft.*
 
 /**
-  * Quotes [[NCTokenEnricher enricher]].
+  * Quotes [[NCTokenEnricher token enricher]].
   *
   * This enricher adds `quoted` boolean [[NCPropertyMap metadata]] property to the [[NCToken token]]
   * instance if word it represents is in quotes. The value `true` of the metadata property indicates that this word is in quotes,
@@ -38,6 +38,7 @@ class NCQuotesTokenEnricher extends NCTokenEnricher with LazyLogging:
     private def isQuote(t: NCToken): Boolean = Q_POS.contains(getPos(t))
 
     //noinspection DuplicatedCode
+    /** @inheritdoc */
     override def enrich(req: NCRequest, cfg: NCModelConfig, toks: List[NCToken]): Unit =
         val quotes = toks.filter(isQuote)
 
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCSwearWordsTokenEnricher.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCSwearWordsTokenEnricher.scala
index f0d282c7..98ca9113 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCSwearWordsTokenEnricher.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCSwearWordsTokenEnricher.scala
@@ -26,7 +26,7 @@ import java.io.*
 import java.util.Objects
 
 /**
-  * "Swear-word" [[NCTokenEnricher enricher]].
+  * "Swear-word" [[NCTokenEnricher token enricher]].
   *
   * This enricher adds `swear` boolean [[NCPropertyMap metadata]] property to the [[NCToken token]]
   * instance if word it represents is a swear word dictionary, i.e. the swear dictionary contains this word's
@@ -52,6 +52,7 @@ class NCSwearWordsTokenEnricher(dictRes: String, stemmer: NCStemmer) extends NCT
             map(p => stemmer.stem(p.toLowerCase)).toSet
         logger.trace(s"Loaded resource: $dictRes")
 
+    /** @inheritdoc */
     override def enrich(req: NCRequest, cfg: NCModelConfig, toks: List[NCToken]): Unit =
         toks.foreach(t => t.put("swear", swearWords.contains(stemmer.stem(t.getText.toLowerCase))))
 
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/NCNLPEntityParser.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/NCNLPEntityParser.scala
index b84d3c18..cc624432 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/NCNLPEntityParser.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/NCNLPEntityParser.scala
@@ -31,9 +31,9 @@ object NCNLPEntityParser:
 import org.apache.nlpcraft.nlp.parsers.NCNLPEntityParser.*
 
 /**
-  *  NLP data [[NCEntityParser parser]].
+  *  NLP data [[NCEntityParser entity parser]].
   *
-  * This parser converts list of input [[NCToken]] instances to list of [[NCEntity]] instances with ID `nlp:entity`.
+  * This parser converts list of input [[NCToken]] instances to list of [[NCEntity]] instances with ID **nlp:entity**.
   * All [[NCEntity]] instances contain following mandatory [[NCPropertyMap metadata]] properties:
   *  - nlp:entity:text
   *  - nlp:entity:index
@@ -42,12 +42,13 @@ import org.apache.nlpcraft.nlp.parsers.NCNLPEntityParser.*
   *
   *  Also created [[NCEntity]] instances receive all another [[NCPropertyMap metadata]] properties
   *  which were added by configured in [[NCPipeline pipeline]] token [[org.apache.nlpcraft.NCTokenEnricher enrichers]].
-  *  These properties identifiers will be prefixed by `nlp:entity:`.
+  *  These properties identifiers will be prefixed by **nlp:entity:**.
   *
   *  @param predicate Predicate which allows to filter list of converted [[NCToken]] instances.
   *  By default all [[NCToken]] instances converted.
   */
 class NCNLPEntityParser(predicate: NCToken => Boolean = _ => true) extends NCEntityParser:
+    /** @inheritdoc */
     override def parse(req: NCRequest, cfg: NCModelConfig, toks: List[NCToken]): List[NCEntity] =
         toks.filter(predicate).map(t =>
             new NCPropertyMapAdapter with NCEntity:
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/NCOpenNLPEntityParser.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/NCOpenNLPEntityParser.scala
index 7613e237..a9244535 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/NCOpenNLPEntityParser.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/NCOpenNLPEntityParser.scala
@@ -47,8 +47,8 @@ object NCOpenNLPEntityParser:
         new NCOpenNLPEntityParser(List(mdl))
 
 /**
-  *  [[https://opennlp.apache.org/ OpenNLP]] based language independent [[NCEntityParser parser]] configured by
-  * paths to [[https://opennlp.apache.org/ OpenNLP]] `name finders` models.
+  *  [[https://opennlp.apache.org/ OpenNLP]] based language independent [[NCEntityParser entity parser]] configured by
+  * paths to [[https://opennlp.apache.org/ OpenNLP]] **name finders** models.
   *
   * This parser prepares [[NCEntity]] instances which are detected by given models.
   * These entities are created with ID `opennlp:modelId`, where `modelId` is [[https://opennlp.apache.org/ OpenNLP]] model ID.
@@ -92,6 +92,7 @@ class NCOpenNLPEntityParser(findersMdlsRes: List[String]) extends NCEntityParser
         finally finder.clearAdaptiveData()
     }
 
+    /** @inheritdoc */
     override def parse(req: NCRequest, cfg: NCModelConfig, toks: List[NCToken]): List[NCEntity] =
         val txtArr = toks.map(_.getText).toArray
 
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/NCOpenNLPTokenParser.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/NCOpenNLPTokenParser.scala
index 82c4b120..a148b3bb 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/NCOpenNLPTokenParser.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/NCOpenNLPTokenParser.scala
@@ -27,8 +27,8 @@ import java.util
 import java.util.Objects
 
 /**
-  *  [[https://opennlp.apache.org/ OpenNLP]] based language independent [[NCTokenParser parser]] configured
-  *  by path to [[https://opennlp.apache.org/ OpenNLP]] `tokenizers` model.
+  *  [[https://opennlp.apache.org/ OpenNLP]] based language independent [[NCTokenParser entity parser]] configured
+  *  by path to [[https://opennlp.apache.org/ OpenNLP]] **tokenizers** model.
   *
   * Some of OpenNLP prepared models can be found [[https://opennlp.sourceforge.net/models-1.5/ here]].
   *
@@ -46,6 +46,7 @@ class NCOpenNLPTokenParser(tokMdlRes: String) extends NCTokenParser with LazyLog
 
         logger.trace(s"Loaded resource: $tokMdlRes")
 
+    /** @inheritdoc */
     override def tokenize(text: String): List[NCToken] =
         this.synchronized {
             tokenizer.tokenizePos(text).zipWithIndex.map { (p, idx) =>
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/NCSemanticElement.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/NCSemanticElement.scala
index e8d43aa1..dd157cc9 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/NCSemanticElement.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/NCSemanticElement.scala
@@ -22,7 +22,7 @@ import org.apache.nlpcraft.nlp.common.NCStemmer
 /**
   *
   * Configuration element which helps to detect [[org.apache.nlpcraft.NCEntity NCEntity]] for
-  * `Semantic` implementation of [[org.apache.nlpcraft.NCEntityParser NCEntityParser]].
+  * **Semantic** implementation of [[org.apache.nlpcraft.NCEntityParser NCEntityParser]].
   *
   * See detailed description on the website [[https://nlpcraft.apache.org/built-in-entity-parser.html#parser-semantic Semantic Parser]].
   *
@@ -30,14 +30,14 @@ import org.apache.nlpcraft.nlp.common.NCStemmer
   */
 trait NCSemanticElement:
     /**
-      * Gets `id` for created [[org.apache.nlpcraft.NCEntity NCEntity]] instance.
+      * Gets **id** for created [[org.apache.nlpcraft.NCEntity NCEntity]] instance.
       * Representation of [[org.apache.nlpcraft.NCEntity.getId NCEntity.getId()]] method.
       * @return Element ID.
       */
     def getId: String
 
     /**
-      * Gets `groups` for created [[org.apache.nlpcraft.NCEntity NCEntity]] instance.
+      * Gets **groups** for created [[org.apache.nlpcraft.NCEntity NCEntity]] instance.
       * Representation of [[org.apache.nlpcraft.NCEntity.getGroups NCEntity.getGroups()]] method.
       * @return Groups.
       */
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/NCSemanticEntityParser.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/NCSemanticEntityParser.scala
index e96a257e..76e5fd83 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/NCSemanticEntityParser.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/NCSemanticEntityParser.scala
@@ -178,7 +178,7 @@ object NCSemanticEntityParser:
 import org.apache.nlpcraft.nlp.parsers.NCSemanticEntityParser.*
 
 /**
-  * `Semantic` [[NCEntityParser parser]] implementation.
+  * **Semantic** [[NCEntityParser entity parser]] implementation.
   *
   * See detailed description on the website [[https://nlpcraft.apache.org/built-in-entity-parser.html#parser-semantic Semantic Parser]].
   *
@@ -233,6 +233,7 @@ class NCSemanticEntityParser(
       */
     private def warnMissedProperty(name: String): Unit = logger.warn(s"'$name' property not found. Is proper token enricher configured?")
 
+    /** @inheritdoc */
     override def parse(req: NCRequest, cfg: NCModelConfig, toks: List[NCToken]): List[NCEntity] =
         if toks.exists(_.get[String]("stopword").isEmpty) then warnMissedProperty("stopword")
 
diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/enrichers/NCSwearWordsTokenEnricherSpec.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/enrichers/NCSwearWordsTokenEnricherSpec.scala
index c48ef94b..78d8b5e4 100644
--- a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/enrichers/NCSwearWordsTokenEnricherSpec.scala
+++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/enrichers/NCSwearWordsTokenEnricherSpec.scala
@@ -17,9 +17,8 @@
 
 package org.apache.nlpcraft.nlp.enrichers
 
-import opennlp.tools.stemmer.PorterStemmer
 import org.apache.nlpcraft.internal.util.NCResourceReader
-import org.apache.nlpcraft.nlp.common.NCStemmer
+import org.apache.nlpcraft.nlp.common.{NCEnStemmer, NCStemmer}
 import org.apache.nlpcraft.nlp.enrichers.NCSwearWordsTokenEnricher
 import org.apache.nlpcraft.nlp.enrichers.*
 import org.apache.nlpcraft.nlp.util.*
@@ -29,10 +28,7 @@ import org.scalatest.funsuite.AnyFunSuite
   */
 class NCSwearWordsTokenEnricherSpec extends AnyFunSuite:
     private val swEnricher = new NCSwearWordsTokenEnricher(
-        NCResourceReader.getPath("badfilter/swear_words.txt"),
-        new NCStemmer:
-            final private val ps: PorterStemmer = new PorterStemmer
-            override def stem(word: String): String = ps.stem(word)
+        NCResourceReader.getPath("badfilter/swear_words.txt"), new NCEnStemmer
     )
 
     test("test") {
diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/util/NCTestUtils.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/util/NCTestUtils.scala
index 2ea44e91..cdc51d87 100644
--- a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/util/NCTestUtils.scala
+++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/util/NCTestUtils.scala
@@ -17,11 +17,10 @@
 
 package org.apache.nlpcraft.nlp.util
 
-import opennlp.tools.stemmer.PorterStemmer
 import org.apache.nlpcraft.*
 import org.apache.nlpcraft.internal.ascii.NCAsciiTable
 import org.apache.nlpcraft.internal.util.NCResourceReader
-import org.apache.nlpcraft.nlp.common.NCStemmer
+import org.apache.nlpcraft.nlp.common.{NCEnStemmer, NCStemmer}
 import org.apache.nlpcraft.nlp.parsers.*
 import org.apache.nlpcraft.nlp.parsers
 import org.apache.nlpcraft.nlp.parsers.{NCOpenNLPTokenParser, NCSemanticElement, NCSemanticEntityParser}
@@ -120,33 +119,24 @@ object NCTestUtils:
                 catch case e: Exception => println(s"Expected error: ${e.getMessage}")
         }
 
-    /**
-      *
-      */
-    private def mkSemanticStemmer: NCStemmer =
-        new NCStemmer():
-            private val ps = new PorterStemmer
-            override def stem(word: String): String = ps.synchronized { ps.stem(word) }
-
-
     /**
       *
       * @param elms
       * @param macros
       */
     def mkEnSemanticParser(elms: List[NCSemanticElement], macros: Map[String, String] = Map.empty): NCSemanticEntityParser =
-        parsers.NCSemanticEntityParser(mkSemanticStemmer, EN_TOK_PARSER, macros, elms)
+        parsers.NCSemanticEntityParser(new NCEnStemmer, EN_TOK_PARSER, macros, elms)
 
     /**
       *
       * @param elms
       */
     def mkEnSemanticParser(elms: NCSemanticElement*): NCSemanticEntityParser =
-        parsers.NCSemanticEntityParser(mkSemanticStemmer, EN_TOK_PARSER, elms.toList)
+        parsers.NCSemanticEntityParser(new NCEnStemmer, EN_TOK_PARSER, elms.toList)
 
     /**
       *
       * @param mdlSrc
       */
     def mkEnSemanticParser(mdlSrc: String): NCSemanticEntityParser =
-        parsers.NCSemanticEntityParser(mkSemanticStemmer, EN_TOK_PARSER, mdlSrc)
\ No newline at end of file
+        parsers.NCSemanticEntityParser(new NCEnStemmer, EN_TOK_PARSER, mdlSrc)
\ No newline at end of file