You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nlpcraft.apache.org by ar...@apache.org on 2021/07/19 23:56:11 UTC
[incubator-nlpcraft] branch master updated: Fix for NLPCRAFT-370.

This is an automated email from the ASF dual-hosted git repository.

aradzinski pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git


The following commit(s) were added to refs/heads/master by this push:
     new 638aad2  Fix for NLPCRAFT-370.
638aad2 is described below

commit 638aad2dc3d20b37f47b61a84511e0dbbdc82c75
Author: Aaron Radzinski <ar...@datalingvo.com>
AuthorDate: Mon Jul 19 16:56:00 2021 -0700

    Fix for NLPCRAFT-370.
---
 .../scala/org/apache/nlpcraft/model/NCToken.java   |  7 +++---
 .../scala/org/apache/nlpcraft/model/NCVariant.java | 27 ++++++++++++++++++++++
 2 files changed, 31 insertions(+), 3 deletions(-)

diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCToken.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCToken.java
index 78e6266..f6d8937 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCToken.java
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCToken.java
@@ -433,8 +433,8 @@ public interface NCToken extends NCMetadata {
     default String getStem() { return meta("nlpcraft:nlp:stem"); }
 
     /**
-     * A shortcut method to get numeric value of how sparse the token is. Sparsity zero means that all
-     * individual words in the token follow each other (regardless of the order).
+     * A shortcut method to get numeric value of how sparse the token is. This makes sense only for multi-word tokens.
+     * Sparsity zero means that all individual words in the token follow each other (regardless of the order).
      * <p>
      * This method is equivalent to:
      * <pre class="brush: java">
@@ -442,7 +442,8 @@ public interface NCToken extends NCMetadata {
      * </pre>
      * See more information on token metadata <a target=_ href="https://nlpcraft.apache.org/data-model.html#meta">here</a>.
      *
-     * @return Numeric value of how sparse the token is.
+     * @return Numeric value of how sparse the token is. Zero means no gaps between words. Bigger the sparsity value
+     *      the bigger the average gap between words.
      */
     default int getSparsity() { return meta("nlpcraft:nlp:sparsity"); }
 
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCVariant.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCVariant.java
index 7aedc83..05fe606 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCVariant.java
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCVariant.java
@@ -37,8 +37,35 @@ public interface NCVariant extends List<NCToken>, NCMetadata {
      * </pre>
      *
      * @return All non-freeword tokens.
+     * @see NCToken#isFreeWord()
      */
     default List<NCToken> getMatchedTokens() {
         return stream().filter(tok -> !tok.isFreeWord()).collect(Collectors.toList());
     }
+
+    /**
+     * Utility method that returns all freeword tokens. It's equivalent to:
+     * <pre class="brush: java">
+     *     return stream().filter(NCToken::isFreeWord).collect(Collectors.toList());
+     * </pre>
+     *
+     * @return All freeword tokens.
+     * @see NCToken#isFreeWord()
+     */
+    default List<NCToken> getFreeTokens() {
+        return stream().filter(NCToken::isFreeWord).collect(Collectors.toList());
+    }
+
+    /**
+     * Utility method that returns all user-defined tokens. It's equivalent to:
+     * <pre class="brush: java">
+     *     return stream().filter(NCToken::isUserDefined).collect(Collectors.toList());
+     * </pre>
+     *
+     * @return All user-defined tokens.
+     * @see NCToken#isFreeWord()
+     */
+    default List<NCToken> getUserDefinedTokens() {
+        return stream().filter(NCToken::isUserDefined).collect(Collectors.toList());
+    }
 }