You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nlpcraft.apache.org by se...@apache.org on 2022/12/07 12:31:14 UTC
[incubator-nlpcraft] branch master updated: WIP.
This is an automated email from the ASF dual-hosted git repository.
sergeykamov pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
The following commit(s) were added to refs/heads/master by this push:
new f6fd158d WIP.
f6fd158d is described below
commit f6fd158d7562866968403725baa6f5f4f5841254
Author: Sergey Khisamov <sk...@fitechsource.com>
AuthorDate: Wed Dec 7 16:31:20 2022 +0400
WIP.
---
.../token/enricher/NCRuLemmaPosTokenEnricher.scala | 1 +
.../nlp/enrichers/NCEnBracketsTokenEnricher.scala | 8 ++++-
.../enrichers/NCEnDictionaryTokenEnricher.scala | 9 ++++-
.../nlp/enrichers/NCEnQuotesTokenEnricher.scala | 7 +++-
.../nlp/enrichers/NCEnStopWordsTokenEnricher.scala | 13 +++++--
.../enrichers/NCEnSwearWordsTokenEnricher.scala | 8 +++--
.../enrichers/NCOpenNLPLemmaPosTokenEnricher.scala | 17 ++++++---
.../nlpcraft/nlp/parsers/NCNLPEntityParser.scala | 14 +++++++-
.../nlp/parsers/NCOpenNLPEntityParser.scala | 17 +++++++--
.../nlp/parsers/NCOpenNLPTokenParser.scala | 8 +++--
.../nlpcraft/nlp/parsers/NCSemanticElement.scala | 23 ++++++++++++
.../nlp/parsers/NCSemanticEntityParser.scala | 41 ++++++++++++++--------
.../nlpcraft/nlp/parsers/NCSemanticStemmer.scala | 10 +++++-
13 files changed, 142 insertions(+), 34 deletions(-)
diff --git a/nlpcraft-examples/lightswitch-ru/src/main/scala/org/apache/nlpcraft/examples/lightswitch/nlp/token/enricher/NCRuLemmaPosTokenEnricher.scala b/nlpcraft-examples/lightswitch-ru/src/main/scala/org/apache/nlpcraft/examples/lightswitch/nlp/token/enricher/NCRuLemmaPosTokenEnricher.scala
index 6544ec91..2251a496 100644
--- a/nlpcraft-examples/lightswitch-ru/src/main/scala/org/apache/nlpcraft/examples/lightswitch/nlp/token/enricher/NCRuLemmaPosTokenEnricher.scala
+++ b/nlpcraft-examples/lightswitch-ru/src/main/scala/org/apache/nlpcraft/examples/lightswitch/nlp/token/enricher/NCRuLemmaPosTokenEnricher.scala
@@ -21,6 +21,7 @@ import org.apache.nlpcraft.*
import org.languagetool.AnalyzedToken
import org.languagetool.tagging.ru.RussianTagger
import scala.jdk.CollectionConverters.*
+
/**
*
*/
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCEnBracketsTokenEnricher.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCEnBracketsTokenEnricher.scala
index 3faf33c5..aff54c0d 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCEnBracketsTokenEnricher.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCEnBracketsTokenEnricher.scala
@@ -24,7 +24,13 @@ import java.io.*
import scala.collection.mutable
/**
- * TODO: enriches with <code>brackets</code> property.
+ * [[NCTokenEnricher]] built-in English language implementation.
+ *
+ * It adds <code>brackets</code> boolean property to [[NCToken]] instance if word which it represents is in brackets.
+ *
+ * Supported brackets are: <code>()</code>, <code>{}</code>, <code>[]</code> and <code><></code>.
+ *
+ * Note that invalid enclosed brackets are ignored.
*/
class NCEnBracketsTokenEnricher extends NCTokenEnricher with LazyLogging:
override def enrich(req: NCRequest, cfg: NCModelConfig, toks: List[NCToken]): Unit =
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCEnDictionaryTokenEnricher.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCEnDictionaryTokenEnricher.scala
index 7360ac3e..5f250c0e 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCEnDictionaryTokenEnricher.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCEnDictionaryTokenEnricher.scala
@@ -21,7 +21,14 @@ import org.apache.nlpcraft.*
import org.apache.nlpcraft.internal.util.NCUtils
/**
- * TODO: enriches with <code>dict</code> property.
+ * [[NCTokenEnricher]] built-in English language implementation.
+ *
+ * It adds <code>dict</code> boolean property to [[NCToken]] instance if word which it represents is
+ * valid English word. That means that English dictionary contains this word initial form.
+ * Look more about [[https://en.wikipedia.org/wiki/Moby_Project Moby Project]] EN dictonary used here.
+ *
+ * Note that this implementation requires <code>lemma</code> string property in [[NCToken]] instance.
+ * You can configure [[NCOpenNLPLemmaPosTokenEnricher]] before [[NCEnDictionaryTokenEnricher]] in your [[NCPipeline]].
*/
class NCEnDictionaryTokenEnricher extends NCTokenEnricher:
private var dict: Set[String] = _
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCEnQuotesTokenEnricher.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCEnQuotesTokenEnricher.scala
index 5c514a71..a3ecf4b2 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCEnQuotesTokenEnricher.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCEnQuotesTokenEnricher.scala
@@ -21,7 +21,12 @@ import com.typesafe.scalalogging.LazyLogging
import org.apache.nlpcraft.*
/**
- * TODO: enriches with <code>quoted</code> property.
+ * [[NCTokenEnricher]] built-in English language implementation.
+ *
+ * It adds <code>quoted</code> boolean property to [[NCToken]] instance if word which it represents is in quotes.
+ *
+ * Note that this implementation requires <code>pos</code> string property in [[NCToken]] instance.
+ * You can configure [[NCOpenNLPLemmaPosTokenEnricher]] before [[NCEnQuotesTokenEnricher]] in your [[NCPipeline]].
*/
class NCEnQuotesTokenEnricher extends NCTokenEnricher with LazyLogging:
private final val Q_POS: Set[String] = Set("``", "''")
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCEnStopWordsTokenEnricher.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCEnStopWordsTokenEnricher.scala
index b3af6f00..03ddf6ee 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCEnStopWordsTokenEnricher.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCEnStopWordsTokenEnricher.scala
@@ -29,7 +29,7 @@ import scala.collection.*
import scala.concurrent.ExecutionContext
/**
- * TODO: enriches with <code>stopword</code> property.
+ * [[NCEnStopWordsTokenEnricher]] helper.
*/
object NCEnStopWordsTokenEnricher:
// Condition types.
@@ -164,9 +164,16 @@ object NCEnStopWordsTokenEnricher:
import org.apache.nlpcraft.nlp.enrichers.NCEnStopWordsTokenEnricher.*
/**
+ * [[NCTokenEnricher]] built-in English language implementation.
*
- * @param addStopsSet
- * @param exclStopsSet
+ * It adds <code>stopword</code> string property to [[NCToken]] instance if word which it represents is stop-word.
+ * Look more about stop-words [[https://en.wikipedia.org/wiki/Stop_word here]].
+ *
+ * Note that this implementation requires <code>pos</code> and <code>lemma</code> string properties in [[NCToken]] instance.
+ * You can configure [[NCOpenNLPLemmaPosTokenEnricher]] before [[NCEnQuotesTokenEnricher]] in your [[NCPipeline]].
+ *
+ * @param addStopsSet User defined additional stop-words collection.
+ * @param exclStopsSet Collection of words which should not be marked as stop-words during component processing.
*/
class NCEnStopWordsTokenEnricher(addStopsSet: Set[String] = Set.empty, exclStopsSet: Set[String] = Set.empty) extends NCTokenEnricher with LazyLogging:
private final val stemmer = new PorterStemmer
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCEnSwearWordsTokenEnricher.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCEnSwearWordsTokenEnricher.scala
index 404c996d..27ad4a0e 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCEnSwearWordsTokenEnricher.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCEnSwearWordsTokenEnricher.scala
@@ -26,9 +26,13 @@ import java.io.*
import java.util.Objects
/**
- * TODO: enriches with <code>swear</code> property.
+ * [[NCTokenEnricher]] built-in English language implementation.
*
- * @param res
+ * It adds <code>swear</code> boolean property to [[NCToken]] instance if word which it represents is swear word.
+ *
+ * @param res Path to swear words list text resource.
+ * Note that [[NCPipelineBuilder.withSemantic()]] methods use
+ * [[https://raw.githubusercontent.com/apache/incubator-nlpcraft/external_config/external/badfilter/swear_words.txt NlpCraft Swearword Dictionary]]
*/
class NCEnSwearWordsTokenEnricher(res: String) extends NCTokenEnricher with LazyLogging:
require(res != null, "Swear words model file cannot be null.")
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCOpenNLPLemmaPosTokenEnricher.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCOpenNLPLemmaPosTokenEnricher.scala
index ddcc2c16..82780458 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCOpenNLPLemmaPosTokenEnricher.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCOpenNLPLemmaPosTokenEnricher.scala
@@ -28,11 +28,20 @@ import java.io.*
import scala.concurrent.ExecutionContext
/**
- * TODO: enriches with <code>lemma</code> and <code>pos</code> properties.
+ * [[NCTokenEnricher]] built-in language independent implementation based on
+ * [[https://opennlp.apache.org/ OpenNLP]] <code>lemma</code> and <code>POS tagger</code> models.
*
- * Models can be downloaded from the following resources:
- * - tagger: http://opennlp.sourceforge.net/models-1.5/en-pos-maxent.bin
- * - lemmatizer: https://raw.githubusercontent.com/richardwilly98/elasticsearch-opennlp-auto-tagging/master/src/main/resources/models/en-lemmatizer.dict
+ * It adds <code>lemma</code> and <code>pos</code> (part-of-speech) string properties to [[NCToken]] instance.
+ * Lemma is the canonical form of word, look [[https://en.wikipedia.org/wiki/Lemma_(morphology) here]] for more details.
+ * Part-of-speech tags are described [[https://www.ling.upenn.edu/courses/Fall_2003/ling001/penn_treebank_pos.html here]].
+ *
+ * Some of OpenNLP prepared models can be found [[https://opennlp.sourceforge.net/models-1.5/ here]].
+ *
+ * @param posMdlSrc Path to [[https://opennlp.apache.org/docs/2.0.0/apidocs/opennlp-tools/opennlp/tools/postag/POSTaggerME.html POSTaggerME]] model.
+ * Note that [[NCPipelineBuilder.withSemantic()]] methods use [[http://opennlp.sourceforge.net/models-1.5/en-pos-maxent.bin en-pos-maxent.bin]].
+ *
+ * @param lemmaDicSrc Path to [[https://opennlp.apache.org/docs/2.0.0/apidocs/opennlp-tools/opennlp/tools/lemmatizer/DictionaryLemmatizer.html DictionaryLemmatizer]] model.
+ * Note that [[NCPipelineBuilder.withSemantic()]] methods use [[https://raw.githubusercontent.com/richardwilly98/elasticsearch-opennlp-auto-tagging/master/src/main/resources/models/en-lemmatizer.dict en-lemmatizer.dict]].
*/
class NCOpenNLPLemmaPosTokenEnricher(posMdlSrc: String = null, lemmaDicSrc: String = null) extends NCTokenEnricher with LazyLogging:
private var tagger: POSTaggerME = _
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/NCNLPEntityParser.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/NCNLPEntityParser.scala
index c894c729..e78fb668 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/NCNLPEntityParser.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/NCNLPEntityParser.scala
@@ -23,7 +23,7 @@ import java.util
import java.util.stream.Collectors
/**
- *
+ * [[NCNLPEntityParser]] helper.
*/
object NCNLPEntityParser:
private val id: String = "nlp:token"
@@ -31,7 +31,19 @@ object NCNLPEntityParser:
import org.apache.nlpcraft.nlp.parsers.NCNLPEntityParser.*
/**
+ * [[NCEntityParser]] built-in NLP implementation.
+ *
+ * It converts list of [[NCToken]] instances to list of [[NCEntity]] instances with ID <code>nlp:token</code>.
+ * Each [[NCEntity]] instance contains following mandatory properties:
+ * - nlp:token:text
+ * - nlp:token:index
+ * - nlp:token:startCharIndex
+ * - nlp:token:endCharIndex
+ *
+ * and all another properties which were already added by [[NCPipeline]] into processed [[NCToken]].
*
+ * @param predicate Predicate which allow to restrict list of converted [[NCToken]] instances.
+ * By default all [[NCToken]] instances converted to [[NCEntity]] instances.
*/
class NCNLPEntityParser(predicate: NCToken => Boolean = _ => true) extends NCEntityParser:
override def parse(req: NCRequest, cfg: NCModelConfig, toks: List[NCToken]): List[NCEntity] =
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/NCOpenNLPEntityParser.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/NCOpenNLPEntityParser.scala
index e70724bb..2f2d0f48 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/NCOpenNLPEntityParser.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/NCOpenNLPEntityParser.scala
@@ -33,16 +33,29 @@ import scala.language.postfixOps
import scala.util.Using
/**
- *
+ * [[NCOpenNLPEntityParser]] helper.
*/
object NCOpenNLPEntityParser:
+ /**
+ * Creates [[NCOpenNLPEntityParser]] instance.
+ *
+ * @param src Path to [[https://opennlp.apache.org/docs/2.0.0/apidocs/opennlp-tools/opennlp/tools/namefind/TokenNameFinderModel.html model]].
+ * @return [[NCOpenNLPEntityParser]] instance.
+ */
def apply(src: String): NCOpenNLPEntityParser =
require(src != null, "Model source cannot be null.")
new NCOpenNLPEntityParser(List(src))
/**
+ * [[NCEntityParser]] built-in implementation based on [[https://opennlp.apache.org/ OpenNLP]] <code>name finders</code> models.
+ *
+ * It prepares [[NCEntity]] instances which are found by configured [[https://opennlp.apache.org/docs/2.0.0/apidocs/opennlp-tools/opennlp/tools/namefind/TokenNameFinderModel.html models]]
+ * with entity ID <code>opennlp:modelId</code>, where <code>modelId</code> is ID of configured models.
+ *
+ * Note that each [[NCToken]] can be included into several [[NCEntity]] instances.
*
- * @param srcs
+ * @param srcs Paths to [[https://opennlp.apache.org/docs/2.0.0/apidocs/opennlp-tools/opennlp/tools/namefind/TokenNameFinderModel.html models]].
+ * Some of OpenNLP prepared models can be found [[https://opennlp.sourceforge.net/models-1.5/ here]].
*/
class NCOpenNLPEntityParser(srcs: List[String]) extends NCEntityParser with LazyLogging:
require(srcs != null, "Models source cannot be null.")
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/NCOpenNLPTokenParser.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/NCOpenNLPTokenParser.scala
index 80775925..cbdbbea9 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/NCOpenNLPTokenParser.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/NCOpenNLPTokenParser.scala
@@ -27,10 +27,12 @@ import java.util
import java.util.Objects
/**
- * Models can be downloaded from the following resources:
- * - tokenizer: http://opennlp.sourceforge.net/models-1.5/en-token.bin
+ * [[NCTokenParser]] built-in language independent implementation based on [[https://opennlp.apache.org/ OpenNLP]] <code>tokenizers</code> models.
*
- * @param tokMdl
+ * Some of OpenNLP prepared models can be found [[https://opennlp.sourceforge.net/models-1.5/ here]].
+ *
+ * @param tokMdl Path to [[https://opennlp.apache.org/docs/2.0.0/apidocs/opennlp-tools/opennlp/tools/tokenize/TokenizerModel.html model]].
+ * Note that [[NCPipelineBuilder.withSemantic()]] methods use [[http://opennlp.sourceforge.net/models-1.5/en-token.bin models-1.5/en-token.bin]].
*/
class NCOpenNLPTokenParser(tokMdl: String) extends NCTokenParser with LazyLogging:
require(tokMdl != null, "Tokenizer model path cannot be null.")
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/NCSemanticElement.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/NCSemanticElement.scala
index 920cc53b..fb376d2f 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/NCSemanticElement.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/NCSemanticElement.scala
@@ -30,31 +30,54 @@ package org.apache.nlpcraft.nlp.parsers
ALl rights reserved.
*/
+// TODO: link on site?
/**
*
+ * Configuration which helps to detect [[org.apache.nlpcraft.NCEntity NCEntity]] for
+ * <code>Semantic</code> implementation of [[org.apache.nlpcraft.NCEntityParser NCEntityParser]].
+ *
+ * See detailed description [[https://nlpcraft.apache.org/built-in-entity-parser.html#parser-semantic Semantic Parser]].
+ *
+ * @see [[NCSemanticEntityParser]]
+ * @see [[NCSemanticStemmer]]
*/
trait NCSemanticElement:
/**
+ * Gets <code>id<code> for created [[org.apache.nlpcraft.NCEntity NCEntity]] instance.
+ * Representation of [[org.apache.nlpcraft.NCEntity.getId NCEntity.getId()]] method.
*
+ * @return Element ID.
*/
def getId: String
/**
+ * Gets <code>groups<code> for created [[org.apache.nlpcraft.NCEntity NCEntity]] instance.
+ * Representation of [[org.apache.nlpcraft.NCEntity.getGroups NCEntity.getGroups()]] method.
*
+ * @return Groups.
*/
def getGroups: Set[String] = Set(getId)
/**
+ * Gets values map. Each element can contain multiple value,
+ * each value is described as name and list of its synonyms.
+ * They allows to find element's value in text.
*
+ * @return Values.
*/
def getValues: Map[String, Set[String]] = Map.empty
/**
+ * Gets elements synonyms list. They allows to find element in text.
*
+ * @return Synonyms.
*/
def getSynonyms: Set[String] = Set.empty
/**
+ * Gets optional <code>properties<code> map for created [[org.apache.nlpcraft.NCEntity NCEntity]] instance.
+ * Representation of [[org.apache.nlpcraft.NCEntity NCEntity]] content.
*
+ * @return Groups.
*/
def getProperties: Map[String, AnyRef] = Map.empty
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/NCSemanticEntityParser.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/NCSemanticEntityParser.scala
index f978fd1f..6dddf61d 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/NCSemanticEntityParser.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/NCSemanticEntityParser.scala
@@ -32,15 +32,16 @@ import scala.annotation.tailrec
import scala.collection.mutable
/**
- *
+ * [[NCSemanticEntityParser]] helper.
*/
object NCSemanticEntityParser:
/**
+ * Creates [[NCSemanticEntityParser]] instance.
*
- * @param stemmer
- * @param parser
- * @param macros
- * @param elements
+ * @param stemmer [[NCSemanticStemmer]] implementation.
+ * @param parser [[NCTokenParser]] implementation.
+ * @param macros Macros map. Empty by default.
+ * @param elements [[NCSemanticElement]] list.
*/
def apply(
stemmer: NCSemanticStemmer,
@@ -57,9 +58,11 @@ object NCSemanticEntityParser:
/**
*
- * @param stemmer
- * @param parser
- * @param elements
+ * Creates [[NCSemanticEntityParser]] instance.
+ *
+ * @param stemmer [[NCSemanticStemmer]] implementation.
+ * @param parser [[NCTokenParser]] implementation.
+ * @param elements [[NCSemanticElement]] list.
*/
def apply(
stemmer: NCSemanticStemmer,
@@ -74,9 +77,11 @@ object NCSemanticEntityParser:
/**
*
- * @param stemmer
- * @param parser
- * @param mdlSrc
+ * Creates [[NCSemanticEntityParser]] instance.
+ *
+ * @param stemmer [[NCSemanticStemmer]] implementation.
+ * @param parser [[NCTokenParser]] implementation.
+ * @param mdlSrc Classpath resource, file path or URL for YAML or JSON semantic model definition file.
*/
def apply(stemmer: NCSemanticStemmer, parser: NCTokenParser, mdlSrc: String): NCSemanticEntityParser =
require(stemmer != null, "Stemmer cannot be null.")
@@ -172,11 +177,17 @@ object NCSemanticEntityParser:
import org.apache.nlpcraft.nlp.parsers.NCSemanticEntityParser.*
/**
+ * [[NCEntityParser]] built-in <code>semantic</code>implementation.
+ *
+ * See detailed description [[https://nlpcraft.apache.org/built-in-entity-parser.html#parser-semantic Semantic Parser]].
+ *
+ * @see [[NCSemanticElement]]
+ * @see [[NCSemanticStemmer]]
*
- * @param stemmer
- * @param parser
- * @param macros
- * @param elements
+ * @param stemmer [[NCSemanticStemmer]] implementation.
+ * @param parser [[NCTokenParser]] implementation.
+ * @param macros Macros map. Empty by default.
+ * @param elements [[NCSemanticElement]] list.
* @param mdlSrcOpt Optional classpath resource, file path or URL for YAML or JSON semantic model definition file.
*/
class NCSemanticEntityParser(
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/NCSemanticStemmer.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/NCSemanticStemmer.scala
index 84a6265f..c09d305b 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/NCSemanticStemmer.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/NCSemanticStemmer.scala
@@ -30,12 +30,20 @@ package org.apache.nlpcraft.nlp.parsers
ALl rights reserved.
*/
+// TODO: link on site?
/**
*
+ * Stemmer trait. Read more about stemming [[https://en.wikipedia.org/wiki/Stemming here]].
+ *
+ * See detailed description [[https://nlpcraft.apache.org/built-in-entity-parser.html#parser-semantic Semantic Parser]].
+ *
+ * @see [[NCSemanticEntityParser]]
+ * @see [[NCSemanticElement]]
*/
trait NCSemanticStemmer:
/**
+ * Gets text's stem.
*
- * @param txt
+ * @param txt Stem.
*/
def stem(txt: String): String