You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nlpcraft.apache.org by ar...@apache.org on 2022/12/27 19:07:18 UTC
[incubator-nlpcraft] branch NLPCRAFT-520 updated: Scaladoc.
This is an automated email from the ASF dual-hosted git repository.
aradzinski pushed a commit to branch NLPCRAFT-520
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
The following commit(s) were added to refs/heads/NLPCRAFT-520 by this push:
new 2fa9bbb4 Scaladoc.
2fa9bbb4 is described below
commit 2fa9bbb466a1daca7834480433c85abd3d21735e
Author: Aaron Radzinski <ar...@datalingvo.com>
AuthorDate: Tue Dec 27 11:07:11 2022 -0800
Scaladoc.
---
.../nlp/enrichers/NCOpenNLPTokenEnricher.scala | 12 +++++-----
.../nlp/enrichers/NCSwearWordsTokenEnricher.scala | 3 ++-
.../nlpcraft/nlp/parsers/NCNLPEntityParser.scala | 26 ++++++++++++----------
3 files changed, 23 insertions(+), 18 deletions(-)
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCOpenNLPTokenEnricher.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCOpenNLPTokenEnricher.scala
index f5aa2f29..b8f72182 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCOpenNLPTokenEnricher.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCOpenNLPTokenEnricher.scala
@@ -32,7 +32,7 @@ import scala.concurrent.ExecutionContext
* instance. Learn more about lemmas [[https://en.wikipedia.org/wiki/Lemma_(morphology) here]] and about part-of-speech
* [[https://www.ling.upenn.edu/courses/Fall_2003/ling001/penn_treebank_pos.html here]].
*
- * This OpenNLP enricher requires PoS and lemma models. Some of free OpenNLP community maintained models can be found
+ * This OpenNLP enricher requires PoS and lemma models. Some of free OpenNLP community-maintained models can be found
* [[https://opennlp.sourceforge.net/models-1.5/ here]]. Note that at least one of model must be defined.
*
* @param posMdlRes Relative path, absolute path, classpath resource or URL to
@@ -58,14 +58,16 @@ class NCOpenNLPTokenEnricher(posMdlRes: String = null, lemmaDicRes: String = nul
() => {
if posMdlRes != null then
tagger = new POSTaggerME(new POSModel(NCUtils.getStream(posMdlRes)))
- logger.trace(s"Loaded resource: $posMdlRes")
- else logger.warn("POS tagger is not configured.")
+ logger.trace(s"Loaded OpenNLP POS tagging model: $posMdlRes")
+ else
+ logger.warn("OpenNLP POS tagger is not configured.")
},
() => {
if lemmaDicRes != null then
lemmatizer = new DictionaryLemmatizer(NCUtils.getStream(lemmaDicRes))
- logger.trace(s"Loaded resource: $lemmaDicRes")
- else logger.warn("Lemmatizer is not configured.")
+ logger.trace(s"Loaded OpenNLP lemmatization model: $lemmaDicRes")
+ else
+ logger.warn("OpenNLP lemmatizer is not configured.")
}
)
)(ExecutionContext.Implicits.global)
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCSwearWordsTokenEnricher.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCSwearWordsTokenEnricher.scala
index 86afb5c7..6b16fdce 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCSwearWordsTokenEnricher.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCSwearWordsTokenEnricher.scala
@@ -38,10 +38,11 @@ import java.util.Objects
* duplicates ignored, lines starting with **#** symbol will be treated as comments and ignored.
* Note that the search in the dictionary is implemented using words' **stem** and case is ignored.
* @param stemmer Stemmer implementation for the language used in the supplied swear-word dictionary.
+ * @see [[org.apache.nlpcraft.nlp.stemmer.NCEnStemmer]]
*/
//noinspection ScalaWeakerAccess
class NCSwearWordsTokenEnricher(dictRes: String, stemmer: NCStemmer) extends NCTokenEnricher with LazyLogging:
- require(dictRes != null, "Swear words dictonary resource cannot be null.")
+ require(dictRes != null, "Swear words dictionary resource cannot be null.")
require(stemmer != null, "Stemmer cannot be null.")
private var swearWords: Set[String] = _
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/NCNLPEntityParser.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/NCNLPEntityParser.scala
index 9d10682a..170af870 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/NCNLPEntityParser.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/NCNLPEntityParser.scala
@@ -31,21 +31,23 @@ private object NCNLPEntityParser:
import NCNLPEntityParser.*
/**
- * NLP data [[NCEntityParser entity parser]].
+ * Common NLP data [[NCEntityParser entity parser]].
*
- * This parser converts list of input [[NCToken]] instances one-to-one to list of [[NCEntity]] instances with type **nlp:entity**.
- * All [[NCEntity]] instances contain following mandatory [[NCPropertyMap metadata]] properties:
- * - nlp:entity:text
- * - nlp:entity:index
- * - nlp:entity:startCharIndex
- * - nlp:entity:endCharIndex
+ * This parser converts list of input [[NCToken]] instances **one-to-one** to the list of [[NCEntity]] instances with
+ * type **nlp:entity**. All [[NCEntity]] instances in the result list will contain the following
+ * [[NCPropertyMap metadata]] properties:
+ * - `nlp:entity:text` - token's text.
+ * - `nlp:entity:index` - token's index in the input sentence.
+ * - `nlp:entity:startCharIndex` - token text's first character index in the input sentence.
+ * - `nlp:entity:endCharIndex` - token text 's last character index in the input sentence.
*
- * Created [[NCEntity]] instances inherit all [[NCToken]] [[NCPropertyMap metadata]] properties,
- * with new names prefixed by **nlp:entity:**.
- * For example for property **prop** new name will be **nlp:entity:prop**.
+ * Note that [[NCEntity]] instances inherit all [[NCToken]] [[NCPropertyMap metadata]] properties from its
+ * corresponding token with new name that is prefixed with **'nlp:entity:'**. For example, for token property **prop**
+ * the corresponding inherited entity property name will be **nlp:entity:prop**.
*
- * @param predicate Predicate which allows to filter list of converted [[NCToken]] instances.
- * By default all [[NCToken]] instances converted.
+ * @param predicate Predicate which allows to filter list of converted [[NCToken]] instances. Only tokens that
+ * satisfy given predicate will convert to entity by this parser. By default all [[NCToken]] instances are
+ * converted.
*/
class NCNLPEntityParser(predicate: NCToken => Boolean = _ => true) extends NCEntityParser:
require(predicate != null, "Predicate cannot be null.")