You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nlpcraft.apache.org by ar...@apache.org on 2021/02/16 18:17:58 UTC

[incubator-nlpcraft] branch NLPCRAFT-236 updated: Code review + javadoc update.

This is an automated email from the ASF dual-hosted git repository.

aradzinski pushed a commit to branch NLPCRAFT-236
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git


The following commit(s) were added to refs/heads/NLPCRAFT-236 by this push:
     new b7802dd  Code review + javadoc update.
b7802dd is described below

commit b7802ddbaeaa47f495c43d9954459e2e2b0c763c
Author: Aaron Radzinski <ar...@apache.org>
AuthorDate: Tue Feb 16 10:17:49 2021 -0800

    Code review + javadoc update.
---
 .../main/scala/org/apache/nlpcraft/model/NCToken.java  | 18 +++++++++++++++---
 .../org/apache/nlpcraft/model/impl/NCTokenImpl.scala   | 10 +++++-----
 2 files changed, 20 insertions(+), 8 deletions(-)

diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCToken.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCToken.java
index fe7a442..c429f59 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCToken.java
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCToken.java
@@ -309,12 +309,24 @@ public interface NCToken extends NCMetadata {
         String id = getId();
         int i = id.indexOf(':');
 
-        return i <=0 || !"nlpcraft google opennlp spacy stanford".contains(id.substring(0, i));
+        return i <= 0 || !"nlpcraft google opennlp spacy stanford".contains(id.substring(0, i));
     }
 
     /**
-     * TODO:
-     * @return
+     * Whether or not this token is abstract.
+     * <p>
+     * An abstract token is only detected when it is either a constituent part of some other non-abstract token
+     * or referenced by built-in tokens. In other words, an abstract token will not be detected in a standalone
+     * unreferenced position. By default (unless returned by this method), all named entities considered to be
+     * non-abstract.
+     * <p>
+     * Declaring tokens as abstract is important to minimize number of parsing variants automatically
+     * generated as permutation of all possible parsing compositions. For example, if it is known that a particular
+     * named entity will only be used as a constituent part of some other token - declaring such named entity as
+     * abstract can significantly reduce the number of parsing variants leading to a better performance,
+     * and often simpler corresponding intent definition and callback logic.
+     *
+     * @return Whether or not this token is abstract.
      */
     boolean isAbstract();
 }
\ No newline at end of file
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/NCTokenImpl.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/NCTokenImpl.scala
index 002e0e0..2a3ea31 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/NCTokenImpl.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/NCTokenImpl.scala
@@ -36,7 +36,7 @@ import scala.collection.{Seq, mutable}
   * @param parentId
   * @param value
   * @param meta
-  * @param isAbstractProp
+  * @param isAbstr
   */
 private[nlpcraft] class NCTokenImpl(
     mdl: NCModelView,
@@ -49,7 +49,7 @@ private[nlpcraft] class NCTokenImpl(
     startCharIndex: Int,
     endCharIndex: Int,
     meta: Map[String, Object],
-    isAbstractProp: Boolean
+    isAbstr: Boolean
 ) extends NCToken with Serializable {
     require(mdl != null)
     require(srvReqId != null)
@@ -74,8 +74,8 @@ private[nlpcraft] class NCTokenImpl(
     override lazy val getStartCharIndex: Int = startCharIndex
     override lazy val getEndCharIndex: Int = endCharIndex
     override lazy val getAliases: java.util.List[String] = meta(TOK_META_ALIASES_KEY, Collections.emptyList())
-    override lazy val isAbstract: Boolean = isAbstractProp
     override def getPartTokens: java.util.List[NCToken] = parts.asJava
+    override def isAbstract: Boolean = isAbstr
 
     def setParts(parts: Seq[NCToken]): Unit = this.parts = parts
 
@@ -155,7 +155,7 @@ private[nlpcraft] object NCTokenImpl {
                     startCharIndex = tok.startCharIndex,
                     endCharIndex = tok.endCharIndex,
                     meta = convertMeta(),
-                    isAbstractProp = mdl.model.getAbstractTokens.contains(elm.getId)
+                    isAbstr = mdl.model.getAbstractTokens.contains(elm.getId)
                 )
 
             case None ⇒
@@ -179,7 +179,7 @@ private[nlpcraft] object NCTokenImpl {
                     startCharIndex = tok.startCharIndex,
                     endCharIndex = tok.endCharIndex,
                     meta = convertMeta(),
-                    isAbstractProp = mdl.model.getAbstractTokens.contains(note.noteType)
+                    isAbstr = mdl.model.getAbstractTokens.contains(note.noteType)
                 )
         }
     }