You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nlpcraft.apache.org by ar...@apache.org on 2021/07/19 23:56:11 UTC
[incubator-nlpcraft] branch master updated: Fix for NLPCRAFT-370.
This is an automated email from the ASF dual-hosted git repository.
aradzinski pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
The following commit(s) were added to refs/heads/master by this push:
new 638aad2 Fix for NLPCRAFT-370.
638aad2 is described below
commit 638aad2dc3d20b37f47b61a84511e0dbbdc82c75
Author: Aaron Radzinski <ar...@datalingvo.com>
AuthorDate: Mon Jul 19 16:56:00 2021 -0700
Fix for NLPCRAFT-370.
---
.../scala/org/apache/nlpcraft/model/NCToken.java | 7 +++---
.../scala/org/apache/nlpcraft/model/NCVariant.java | 27 ++++++++++++++++++++++
2 files changed, 31 insertions(+), 3 deletions(-)
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCToken.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCToken.java
index 78e6266..f6d8937 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCToken.java
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCToken.java
@@ -433,8 +433,8 @@ public interface NCToken extends NCMetadata {
default String getStem() { return meta("nlpcraft:nlp:stem"); }
/**
- * A shortcut method to get numeric value of how sparse the token is. Sparsity zero means that all
- * individual words in the token follow each other (regardless of the order).
+ * A shortcut method to get numeric value of how sparse the token is. This makes sense only for multi-word tokens.
+ * Sparsity zero means that all individual words in the token follow each other (regardless of the order).
* <p>
* This method is equivalent to:
* <pre class="brush: java">
@@ -442,7 +442,8 @@ public interface NCToken extends NCMetadata {
* </pre>
* See more information on token metadata <a target=_ href="https://nlpcraft.apache.org/data-model.html#meta">here</a>.
*
- * @return Numeric value of how sparse the token is.
+ * @return Numeric value of how sparse the token is. Zero means no gaps between words. Bigger the sparsity value
+ * the bigger the average gap between words.
*/
default int getSparsity() { return meta("nlpcraft:nlp:sparsity"); }
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCVariant.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCVariant.java
index 7aedc83..05fe606 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCVariant.java
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCVariant.java
@@ -37,8 +37,35 @@ public interface NCVariant extends List<NCToken>, NCMetadata {
* </pre>
*
* @return All non-freeword tokens.
+ * @see NCToken#isFreeWord()
*/
default List<NCToken> getMatchedTokens() {
return stream().filter(tok -> !tok.isFreeWord()).collect(Collectors.toList());
}
+
+ /**
+ * Utility method that returns all freeword tokens. It's equivalent to:
+ * <pre class="brush: java">
+ * return stream().filter(NCToken::isFreeWord).collect(Collectors.toList());
+ * </pre>
+ *
+ * @return All freeword tokens.
+ * @see NCToken#isFreeWord()
+ */
+ default List<NCToken> getFreeTokens() {
+ return stream().filter(NCToken::isFreeWord).collect(Collectors.toList());
+ }
+
+ /**
+ * Utility method that returns all user-defined tokens. It's equivalent to:
+ * <pre class="brush: java">
+ * return stream().filter(NCToken::isUserDefined).collect(Collectors.toList());
+ * </pre>
+ *
+ * @return All user-defined tokens.
+ * @see NCToken#isFreeWord()
+ */
+ default List<NCToken> getUserDefinedTokens() {
+ return stream().filter(NCToken::isUserDefined).collect(Collectors.toList());
+ }
}