You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nlpcraft.apache.org by ar...@apache.org on 2022/12/14 00:19:14 UTC
[incubator-nlpcraft] branch NLPCRAFT-520 updated: Update NCOpenNLPTokenEnricher.scala
This is an automated email from the ASF dual-hosted git repository.
aradzinski pushed a commit to branch NLPCRAFT-520
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
The following commit(s) were added to refs/heads/NLPCRAFT-520 by this push:
new 4c27312e Update NCOpenNLPTokenEnricher.scala
4c27312e is described below
commit 4c27312eb9e9de237c238e1cfb89c6dd0d0422ea
Author: Aaron Radzinski <ar...@datalingvo.com>
AuthorDate: Tue Dec 13 16:18:39 2022 -0800
Update NCOpenNLPTokenEnricher.scala
---
.../nlp/enrichers/NCOpenNLPTokenEnricher.scala | 21 ++++++++++-----------
1 file changed, 10 insertions(+), 11 deletions(-)
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCOpenNLPTokenEnricher.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCOpenNLPTokenEnricher.scala
index 270ca60a..f8451775 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCOpenNLPTokenEnricher.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCOpenNLPTokenEnricher.scala
@@ -27,19 +27,18 @@ import java.io.*
import scala.concurrent.ExecutionContext
/**
- * [[https://opennlp.apache.org/ OpenNLP]] based language independent [[NCTokenEnricher token enricher]].
+ * [[https://opennlp.apache.org/ OpenNLP]] based language independent [[NCTokenEnricher token enricher]]. This
+ * enricher adds `lemma` and `pos` (part-of-speech) string [[NCPropertyMap metadata]] property to the [[NCToken token]]
+ * instance. Learn more about lemmas [[https://en.wikipedia.org/wiki/Lemma_(morphology) here]] and about part-of-speech
+ * [[https://www.ling.upenn.edu/courses/Fall_2003/ling001/penn_treebank_pos.html here]].
*
- * This enricher adds `lemma` and `pos` (part-of-speech) string [[NCPropertyMap metadata]] property to the [[NCToken token]]
- * instance.
-
- * Lemma is the canonical form of word, look [[https://en.wikipedia.org/wiki/Lemma_(morphology) here]] for more details.
- *
- * Part-of-speech tags are described [[https://www.ling.upenn.edu/courses/Fall_2003/ling001/penn_treebank_pos.html here]].
- *
- * Some of OpenNLP prepared models can be found [[https://opennlp.sourceforge.net/models-1.5/ here]].
+ * This OpenNLP enricher requires PoS and lemma models. Some of OpenNLP community models can be found
+ * [[https://opennlp.sourceforge.net/models-1.5/ here]].
*
- * @param posMdlRes Path to [[https://opennlp.apache.org/docs/2.0.0/apidocs/opennlp-tools/opennlp/tools/postag/POSTaggerME.html POSTaggerME]] model.
- * @param lemmaDicRes Path to [[https://opennlp.apache.org/docs/2.0.0/apidocs/opennlp-tools/opennlp/tools/lemmatizer/DictionaryLemmatizer.html DictionaryLemmatizer]] model.
+ * @param posMdlRes Relative path, absolute path or URL to
+ * [[https://opennlp.apache.org/docs/2.0.0/apidocs/opennlp-tools/opennlp/tools/postag/POSTaggerME.html POSTaggerME]] model.
+ * @param lemmaDicRes Relative path, absolute path or URL to
+ * [[https://opennlp.apache.org/docs/2.0.0/apidocs/opennlp-tools/opennlp/tools/lemmatizer/DictionaryLemmatizer.html DictionaryLemmatizer]] model.
*/
class NCOpenNLPTokenEnricher(posMdlRes: String = null, lemmaDicRes: String = null) extends NCTokenEnricher with LazyLogging:
private var tagger: POSTaggerME = _