You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nlpcraft.apache.org by ar...@apache.org on 2022/12/21 17:42:40 UTC

[incubator-nlpcraft] branch NLPCRAFT-520 updated: Scaladoc fixes.

This is an automated email from the ASF dual-hosted git repository.

aradzinski pushed a commit to branch NLPCRAFT-520
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git


The following commit(s) were added to refs/heads/NLPCRAFT-520 by this push:
     new f5ee0f27 Scaladoc fixes.
f5ee0f27 is described below

commit f5ee0f27fe264c2151a879afe1f4dfc9f9b95892
Author: Aaron Radzinski <ar...@datalingvo.com>
AuthorDate: Wed Dec 21 09:42:34 2022 -0800

    Scaladoc fixes.
---
 .../org/apache/nlpcraft/internal/util/NCUtils.scala   |  2 +-
 .../nlp/enrichers/NCBracketsTokenEnricher.scala       |  6 ++----
 .../nlp/enrichers/NCDictionaryTokenEnricher.scala     |  9 +++++----
 .../nlp/enrichers/NCEnStopWordsTokenEnricher.scala    |  5 +----
 .../nlp/enrichers/NCOpenNLPTokenEnricher.scala        |  8 +++-----
 .../nlp/enrichers/NCQuotesTokenEnricher.scala         |  5 ++---
 .../nlp/enrichers/NCSwearWordsTokenEnricher.scala     | 19 +++++++------------
 7 files changed, 21 insertions(+), 33 deletions(-)

diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/util/NCUtils.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/util/NCUtils.scala
index 1b81acd0..f9ad8792 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/util/NCUtils.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/util/NCUtils.scala
@@ -392,7 +392,7 @@ object NCUtils extends LazyLogging:
         data
 
     /**
-      *  Reads lines from given resource.
+      * Reads lines from given resource.
       *
       * @param res Resource, file absolute or relative path or input stream.
       * @param enc Encoding. Default value is "UTF-8".
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCBracketsTokenEnricher.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCBracketsTokenEnricher.scala
index 70ce779a..9b0fc863 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCBracketsTokenEnricher.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCBracketsTokenEnricher.scala
@@ -26,7 +26,7 @@ import scala.collection.{Map, mutable}
 /**
   * Companion helper.
   */
-object NCBracketsTokenEnricher:
+private object NCBracketsTokenEnricher:
     private val BRACKETS = Map("(" -> ")", "{" -> "}", "[" -> "]", "<" -> ">")
     private val BRACKETS_REVERSED = BRACKETS.map { case (key, value) => value -> key }
 
@@ -36,9 +36,7 @@ import NCBracketsTokenEnricher.*
   * Brackets [[NCTokenEnricher token enricher]].
   *
   * This enricher adds `brackets` boolean [[NCPropertyMap metadata]] property to the [[NCToken token]]
-  * instance if the word it represents is enclosed in brackets.
-  *
-  * Supported brackets are: `()`, `{}`, `[]` and `<>`.
+  * instance if the word it represents is enclosed in brackets. Supported brackets are: `()`, `{}`, `[]` and `<>`.
   *
   * **NOTE:** invalid enclosed brackets are ignored and for all input tokens property `brackets` assigned as `false`.
   */
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCDictionaryTokenEnricher.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCDictionaryTokenEnricher.scala
index 0d28f3ad..490a59bb 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCDictionaryTokenEnricher.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCDictionaryTokenEnricher.scala
@@ -22,7 +22,7 @@ import org.apache.nlpcraft.*
 import org.apache.nlpcraft.internal.util.NCUtils as U
 
 /**
-  * "Known-word" [[NCTokenEnricher token enricher]].
+  * Dictionary-based "known-word" [[NCTokenEnricher token enricher]].
   *
   * This enricher adds `dict` boolean [[NCPropertyMap metadata]] property to the [[NCToken token]]
   * instance if word it represents is a known dictionary word, i.e. the configured dictionary contains this word's
@@ -34,12 +34,13 @@ import org.apache.nlpcraft.internal.util.NCUtils as U
   * metadata property before this enricher in your [[NCPipeline pipeline]].
   *
   * @param dictRes Relative path, absolute path, classpath resource or URL to the dictionary.
-  *         The dictionary should have a simple plain text format with *one lemma per line*, empty lines are skipped, duplicates ignored, header or other comments allowed.
-  *         Headers are lines started with **#** symbol. Search in the dictionary is implemented by input words **lemms**, case is ignored.
+  *         The dictionary should have a simple plain text format with *one lemma per line*, empty lines are skipped,
+  *         duplicates ignored, lines starting with **#** symbol will be treated as comments and ignored.
+  *         Note that the search in the dictionary is implemented using words' **lemma** and case is ignored.
   */
 //noinspection DuplicatedCode,ScalaWeakerAccess
 class NCDictionaryTokenEnricher(dictRes: String) extends NCTokenEnricher with LazyLogging:
-    require(dictRes != null, "Dictonary resource cannot be null.")
+    require(dictRes != null, "Dictionary resource cannot be null.")
 
     private var dict: Set[String] = _
 
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCEnStopWordsTokenEnricher.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCEnStopWordsTokenEnricher.scala
index 3eaa7f6e..698c43f0 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCEnStopWordsTokenEnricher.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCEnStopWordsTokenEnricher.scala
@@ -172,15 +172,12 @@ import org.apache.nlpcraft.nlp.enrichers.NCEnStopWordsTokenEnricher.*
   * instance if the word it represents is an English stopword. The value `true` of this metadata property indicates that
   * this word is detected as a stopword, `false` value indicates otherwise. This implementation works off the
   * algorithm that uses an internal list of English stopwords as well as a procedural logic to determine the stopword
-  * status of the token. This algorithm should work fine for most of the general uses cases. User, however, can add
+  * status of the token. This algorithm should work fine for most of the general uses cases. User can also add
   * additional stopwords or exceptions for the existing ones using corresponding parameters in [[NCEnStopWordsTokenEnricher]]
   * constructor.
   *
   * More information about stopwords can be found at [[https://en.wikipedia.org/wiki/Stop_word]].
   *
-  * `stemmer` implementation language should be corresponded to other components of [[NCPipeline]], but
-  * required `stemmer` implementation is independent from other components' stemmers.
-  *
   * **NOTE:** this implementation requires `lemma` and `pos` string [[NCPropertyMap metadata]] properties that
   * contain token's lemma and part of speech accordingly. You can configure [[NCOpenNLPTokenEnricher]] with the model
   * for English language that would provide these metadata properties before this enricher in your [[NCPipeline pipeline]].
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCOpenNLPTokenEnricher.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCOpenNLPTokenEnricher.scala
index a51284da..f5aa2f29 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCOpenNLPTokenEnricher.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCOpenNLPTokenEnricher.scala
@@ -27,15 +27,13 @@ import java.io.*
 import scala.concurrent.ExecutionContext
 
 /**
-  * [[https://opennlp.apache.org/ OpenNLP]] based language independent [[NCTokenEnricher token enricher]]. This
+  * [[https://opennlp.apache.org/ OpenNLP]]-based language independent [[NCTokenEnricher token enricher]]. This
   * enricher adds `lemma` and `pos` (part-of-speech) string [[NCPropertyMap metadata]] property to the [[NCToken token]]
   * instance. Learn more about lemmas [[https://en.wikipedia.org/wiki/Lemma_(morphology) here]] and about part-of-speech
   * [[https://www.ling.upenn.edu/courses/Fall_2003/ling001/penn_treebank_pos.html here]].
   *
-  * At least one of model must be defined.
-  *
-  * This OpenNLP enricher requires PoS and lemma models. Some of OpenNLP community models can be found
-  * [[https://opennlp.sourceforge.net/models-1.5/ here]].
+  * This OpenNLP enricher requires PoS and lemma models. Some of free OpenNLP community maintained models can be found
+  * [[https://opennlp.sourceforge.net/models-1.5/ here]]. Note that at least one of model must be defined.
   *
   * @param posMdlRes Relative path, absolute path, classpath resource or URL to
   *         [[https://opennlp.apache.org/docs/2.0.0/apidocs/opennlp-tools/opennlp/tools/postag/POSTaggerME.html POSTaggerME]] model.
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCQuotesTokenEnricher.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCQuotesTokenEnricher.scala
index f2abb1c8..e55be2fb 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCQuotesTokenEnricher.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCQuotesTokenEnricher.scala
@@ -24,12 +24,11 @@ import scala.collection.*
 /**
   * Companion helper.
   */
-object NCQuotesTokenEnricher:
+private object NCQuotesTokenEnricher:
     private case class Range(from: Int, to: Int):
         def in(idx: Int): Boolean = idx >= from && idx <= to
 
     private val QUOTES = Map("«" -> "»", "\"" -> "\"", "`" -> "`", "'" -> "'")
-
     private val QUOTES_REVERSED = QUOTES.map { case (key, value) => value -> key }
     private val QUOTES_SYMBOLS = QUOTES.flatMap { case (key, value) => Set(key, value) }.toSet
 
@@ -46,7 +45,7 @@ import NCQuotesTokenEnricher.*
   *
   * Supported quotes are: **«**, **»**, **"**, **'**, **&#96;**.
   *
-  * **NOTE:** invalid enclosed quotes are ignored and for all input tokens property `quoted` assigned as `false`.
+  * **NOTE:** invalid enclosed quotes are ignored.
   */
 //noinspection ScalaWeakerAccess
 class NCQuotesTokenEnricher extends NCTokenEnricher with LazyLogging:
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCSwearWordsTokenEnricher.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCSwearWordsTokenEnricher.scala
index ff0ebc98..86afb5c7 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCSwearWordsTokenEnricher.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCSwearWordsTokenEnricher.scala
@@ -26,23 +26,18 @@ import java.io.*
 import java.util.Objects
 
 /**
-  * "Swear-word" [[NCTokenEnricher token enricher]].
+  * Swear-word [[NCTokenEnricher token enricher]].
   *
   * This enricher adds `swear` boolean [[NCPropertyMap metadata]] property to the [[NCToken token]]
-  * instance if word it represents is a swear word dictionary, i.e. the swear dictionary contains this word's
+  * instance if word it represents is in a swear word dictionary, i.e. the swear dictionary contains this word's
   * stem. The value `true` of the metadata property indicates that this word's stem is found in the dictionary,
   * `false` value indicates otherwise.
   *
-  * Read more about stemming [[https://en.wikipedia.org/wiki/Stemming here]].
-  * Dictionary language and `stemmer` implementation language should be corresponded to other components of [[NCPipeline]], but
-  * required `stemmer` implementation is independent from other components' stemmers.
-  *
-  * Stemming is used here because it is too difficult to be based on more accurate `lemma` approach for swear words.
-  *
-  * @param dictRes Relative path, absolute path, classpath resource or URL to the swear dictionary. The dictionary should have a simple
-  *         plain text format with *one word per line*, empty lines are skipped, duplicates ignored, header or other comments allowed.
-  *         Headers are lines started with **#** symbol. Search in the dictionary is implemented by input words **stems**, case is ignored.
-  * @param stemmer Stemmer implementation for the dictionary language.
+  * @param dictRes Relative path, absolute path, classpath resource or URL to the dictionary.
+  *         The dictionary should have a simple plain text format with *one lemma per line*, empty lines are skipped,
+  *         duplicates ignored, lines starting with **#** symbol will be treated as comments and ignored.
+  *         Note that the search in the dictionary is implemented using words' **stem** and case is ignored.
+  * @param stemmer Stemmer implementation for the language used in the supplied swear-word dictionary.
   */
 //noinspection ScalaWeakerAccess
 class NCSwearWordsTokenEnricher(dictRes: String, stemmer: NCStemmer) extends NCTokenEnricher with LazyLogging: