You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nlpcraft.apache.org by ar...@apache.org on 2022/12/27 19:07:18 UTC

[incubator-nlpcraft] branch NLPCRAFT-520 updated: Scaladoc.

This is an automated email from the ASF dual-hosted git repository.

aradzinski pushed a commit to branch NLPCRAFT-520
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git


The following commit(s) were added to refs/heads/NLPCRAFT-520 by this push:
     new 2fa9bbb4 Scaladoc.
2fa9bbb4 is described below

commit 2fa9bbb466a1daca7834480433c85abd3d21735e
Author: Aaron Radzinski <ar...@datalingvo.com>
AuthorDate: Tue Dec 27 11:07:11 2022 -0800

    Scaladoc.
---
 .../nlp/enrichers/NCOpenNLPTokenEnricher.scala     | 12 +++++-----
 .../nlp/enrichers/NCSwearWordsTokenEnricher.scala  |  3 ++-
 .../nlpcraft/nlp/parsers/NCNLPEntityParser.scala   | 26 ++++++++++++----------
 3 files changed, 23 insertions(+), 18 deletions(-)

diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCOpenNLPTokenEnricher.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCOpenNLPTokenEnricher.scala
index f5aa2f29..b8f72182 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCOpenNLPTokenEnricher.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCOpenNLPTokenEnricher.scala
@@ -32,7 +32,7 @@ import scala.concurrent.ExecutionContext
   * instance. Learn more about lemmas [[https://en.wikipedia.org/wiki/Lemma_(morphology) here]] and about part-of-speech
   * [[https://www.ling.upenn.edu/courses/Fall_2003/ling001/penn_treebank_pos.html here]].
   *
-  * This OpenNLP enricher requires PoS and lemma models. Some of free OpenNLP community maintained models can be found
+  * This OpenNLP enricher requires PoS and lemma models. Some of free OpenNLP community-maintained models can be found
   * [[https://opennlp.sourceforge.net/models-1.5/ here]]. Note that at least one of model must be defined.
   *
   * @param posMdlRes Relative path, absolute path, classpath resource or URL to
@@ -58,14 +58,16 @@ class NCOpenNLPTokenEnricher(posMdlRes: String = null, lemmaDicRes: String = nul
                 () => {
                     if posMdlRes != null then
                         tagger = new POSTaggerME(new POSModel(NCUtils.getStream(posMdlRes)))
-                        logger.trace(s"Loaded resource: $posMdlRes")
-                    else logger.warn("POS tagger is not configured.")
+                        logger.trace(s"Loaded OpenNLP POS tagging model: $posMdlRes")
+                    else
+                        logger.warn("OpenNLP POS tagger is not configured.")
                 },
                 () => {
                     if lemmaDicRes != null then
                         lemmatizer = new DictionaryLemmatizer(NCUtils.getStream(lemmaDicRes))
-                        logger.trace(s"Loaded resource: $lemmaDicRes")
-                    else logger.warn("Lemmatizer is not configured.")
+                        logger.trace(s"Loaded OpenNLP lemmatization model: $lemmaDicRes")
+                    else
+                        logger.warn("OpenNLP lemmatizer is not configured.")
                 }
             )
         )(ExecutionContext.Implicits.global)
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCSwearWordsTokenEnricher.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCSwearWordsTokenEnricher.scala
index 86afb5c7..6b16fdce 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCSwearWordsTokenEnricher.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCSwearWordsTokenEnricher.scala
@@ -38,10 +38,11 @@ import java.util.Objects
   *         duplicates ignored, lines starting with **#** symbol will be treated as comments and ignored.
   *         Note that the search in the dictionary is implemented using words' **stem** and case is ignored.
   * @param stemmer Stemmer implementation for the language used in the supplied swear-word dictionary.
+  * @see [[org.apache.nlpcraft.nlp.stemmer.NCEnStemmer]]
   */
 //noinspection ScalaWeakerAccess
 class NCSwearWordsTokenEnricher(dictRes: String, stemmer: NCStemmer) extends NCTokenEnricher with LazyLogging:
-    require(dictRes != null, "Swear words dictonary resource cannot be null.")
+    require(dictRes != null, "Swear words dictionary resource cannot be null.")
     require(stemmer != null, "Stemmer cannot be null.")
 
     private var swearWords: Set[String] = _
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/NCNLPEntityParser.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/NCNLPEntityParser.scala
index 9d10682a..170af870 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/NCNLPEntityParser.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/NCNLPEntityParser.scala
@@ -31,21 +31,23 @@ private object NCNLPEntityParser:
 import NCNLPEntityParser.*
 
 /**
-  * NLP data [[NCEntityParser entity parser]].
+  * Common NLP data [[NCEntityParser entity parser]].
   *
-  * This parser converts list of input [[NCToken]] instances one-to-one to list of [[NCEntity]] instances with type **nlp:entity**.
-  * All [[NCEntity]] instances contain following mandatory [[NCPropertyMap metadata]] properties:
-  *  - nlp:entity:text
-  *  - nlp:entity:index
-  *  - nlp:entity:startCharIndex
-  *  - nlp:entity:endCharIndex
+  * This parser converts list of input [[NCToken]] instances **one-to-one** to the list of [[NCEntity]] instances with
+  * type **nlp:entity**. All [[NCEntity]] instances in the result list will contain the following
+  * [[NCPropertyMap metadata]] properties:
+  *  - `nlp:entity:text` - token's text.
+  *  - `nlp:entity:index` - token's index in the input sentence.
+  *  - `nlp:entity:startCharIndex` - token text's first character index in the input sentence.
+  *  - `nlp:entity:endCharIndex` - token text 's last character index in the input sentence.
   *
-  *  Created [[NCEntity]] instances inherit all [[NCToken]] [[NCPropertyMap metadata]] properties,
-  *  with new names prefixed by **nlp:entity:**.
-  *  For example for property **prop** new name will be **nlp:entity:prop**.
+  *  Note that [[NCEntity]] instances inherit all [[NCToken]] [[NCPropertyMap metadata]] properties from its
+  *  corresponding token with new name that is prefixed with **'nlp:entity:'**. For example, for token property **prop**
+  *  the corresponding inherited entity property name will be **nlp:entity:prop**.
   *
-  *  @param predicate Predicate which allows to filter list of converted [[NCToken]] instances.
-  *  By default all [[NCToken]] instances converted.
+  *  @param predicate Predicate which allows to filter list of converted [[NCToken]] instances. Only tokens that
+  *     satisfy given predicate will convert to entity by this parser. By default all [[NCToken]] instances are
+  *     converted.
   */
 class NCNLPEntityParser(predicate: NCToken => Boolean = _ => true) extends NCEntityParser:
     require(predicate != null, "Predicate cannot be null.")