You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nlpcraft.apache.org by ar...@apache.org on 2022/12/14 00:19:14 UTC

[incubator-nlpcraft] branch NLPCRAFT-520 updated: Update NCOpenNLPTokenEnricher.scala

This is an automated email from the ASF dual-hosted git repository.

aradzinski pushed a commit to branch NLPCRAFT-520
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git


The following commit(s) were added to refs/heads/NLPCRAFT-520 by this push:
     new 4c27312e Update NCOpenNLPTokenEnricher.scala
4c27312e is described below

commit 4c27312eb9e9de237c238e1cfb89c6dd0d0422ea
Author: Aaron Radzinski <ar...@datalingvo.com>
AuthorDate: Tue Dec 13 16:18:39 2022 -0800

    Update NCOpenNLPTokenEnricher.scala
---
 .../nlp/enrichers/NCOpenNLPTokenEnricher.scala      | 21 ++++++++++-----------
 1 file changed, 10 insertions(+), 11 deletions(-)

diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCOpenNLPTokenEnricher.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCOpenNLPTokenEnricher.scala
index 270ca60a..f8451775 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCOpenNLPTokenEnricher.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCOpenNLPTokenEnricher.scala
@@ -27,19 +27,18 @@ import java.io.*
 import scala.concurrent.ExecutionContext
 
 /**
-  * [[https://opennlp.apache.org/ OpenNLP]] based language independent [[NCTokenEnricher token enricher]].
+  * [[https://opennlp.apache.org/ OpenNLP]] based language independent [[NCTokenEnricher token enricher]]. This
+  * enricher adds `lemma` and `pos` (part-of-speech) string [[NCPropertyMap metadata]] property to the [[NCToken token]]
+  * instance. Learn more about lemmas [[https://en.wikipedia.org/wiki/Lemma_(morphology) here]] and about part-of-speech
+  * [[https://www.ling.upenn.edu/courses/Fall_2003/ling001/penn_treebank_pos.html here]].
   *
-  * This enricher adds `lemma` and `pos` (part-of-speech) string [[NCPropertyMap metadata]] property to the [[NCToken token]]
-  * instance.
-
-  * Lemma is the canonical form of word, look [[https://en.wikipedia.org/wiki/Lemma_(morphology) here]] for more details.
-  *
-  * Part-of-speech tags are described [[https://www.ling.upenn.edu/courses/Fall_2003/ling001/penn_treebank_pos.html here]].
-  *
-  * Some of OpenNLP prepared models can be found [[https://opennlp.sourceforge.net/models-1.5/ here]].
+  * This OpenNLP enricher requires PoS and lemma models. Some of OpenNLP community models can be found
+  * [[https://opennlp.sourceforge.net/models-1.5/ here]].
   *
-  * @param posMdlRes Path to [[https://opennlp.apache.org/docs/2.0.0/apidocs/opennlp-tools/opennlp/tools/postag/POSTaggerME.html POSTaggerME]] model.
-  * @param lemmaDicRes Path to [[https://opennlp.apache.org/docs/2.0.0/apidocs/opennlp-tools/opennlp/tools/lemmatizer/DictionaryLemmatizer.html DictionaryLemmatizer]] model.
+  * @param posMdlRes Relative path, absolute path or URL to
+  *         [[https://opennlp.apache.org/docs/2.0.0/apidocs/opennlp-tools/opennlp/tools/postag/POSTaggerME.html POSTaggerME]] model.
+  * @param lemmaDicRes Relative path, absolute path or URL to
+  *         [[https://opennlp.apache.org/docs/2.0.0/apidocs/opennlp-tools/opennlp/tools/lemmatizer/DictionaryLemmatizer.html DictionaryLemmatizer]] model.
   */
 class NCOpenNLPTokenEnricher(posMdlRes: String = null, lemmaDicRes: String = null) extends NCTokenEnricher with LazyLogging:
     private var tagger: POSTaggerME = _