You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nlpcraft.apache.org by ar...@apache.org on 2021/02/16 18:17:58 UTC
[incubator-nlpcraft] branch NLPCRAFT-236 updated: Code review +
javadoc update.
This is an automated email from the ASF dual-hosted git repository.
aradzinski pushed a commit to branch NLPCRAFT-236
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
The following commit(s) were added to refs/heads/NLPCRAFT-236 by this push:
new b7802dd Code review + javadoc update.
b7802dd is described below
commit b7802ddbaeaa47f495c43d9954459e2e2b0c763c
Author: Aaron Radzinski <ar...@apache.org>
AuthorDate: Tue Feb 16 10:17:49 2021 -0800
Code review + javadoc update.
---
.../main/scala/org/apache/nlpcraft/model/NCToken.java | 18 +++++++++++++++---
.../org/apache/nlpcraft/model/impl/NCTokenImpl.scala | 10 +++++-----
2 files changed, 20 insertions(+), 8 deletions(-)
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCToken.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCToken.java
index fe7a442..c429f59 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCToken.java
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCToken.java
@@ -309,12 +309,24 @@ public interface NCToken extends NCMetadata {
String id = getId();
int i = id.indexOf(':');
- return i <=0 || !"nlpcraft google opennlp spacy stanford".contains(id.substring(0, i));
+ return i <= 0 || !"nlpcraft google opennlp spacy stanford".contains(id.substring(0, i));
}
/**
- * TODO:
- * @return
+ * Whether or not this token is abstract.
+ * <p>
+ * An abstract token is only detected when it is either a constituent part of some other non-abstract token
+ * or referenced by built-in tokens. In other words, an abstract token will not be detected in a standalone
+ * unreferenced position. By default (unless returned by this method), all named entities considered to be
+ * non-abstract.
+ * <p>
+ * Declaring tokens as abstract is important to minimize number of parsing variants automatically
+ * generated as permutation of all possible parsing compositions. For example, if it is known that a particular
+ * named entity will only be used as a constituent part of some other token - declaring such named entity as
+ * abstract can significantly reduce the number of parsing variants leading to a better performance,
+ * and often simpler corresponding intent definition and callback logic.
+ *
+ * @return Whether or not this token is abstract.
*/
boolean isAbstract();
}
\ No newline at end of file
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/NCTokenImpl.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/NCTokenImpl.scala
index 002e0e0..2a3ea31 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/NCTokenImpl.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/NCTokenImpl.scala
@@ -36,7 +36,7 @@ import scala.collection.{Seq, mutable}
* @param parentId
* @param value
* @param meta
- * @param isAbstractProp
+ * @param isAbstr
*/
private[nlpcraft] class NCTokenImpl(
mdl: NCModelView,
@@ -49,7 +49,7 @@ private[nlpcraft] class NCTokenImpl(
startCharIndex: Int,
endCharIndex: Int,
meta: Map[String, Object],
- isAbstractProp: Boolean
+ isAbstr: Boolean
) extends NCToken with Serializable {
require(mdl != null)
require(srvReqId != null)
@@ -74,8 +74,8 @@ private[nlpcraft] class NCTokenImpl(
override lazy val getStartCharIndex: Int = startCharIndex
override lazy val getEndCharIndex: Int = endCharIndex
override lazy val getAliases: java.util.List[String] = meta(TOK_META_ALIASES_KEY, Collections.emptyList())
- override lazy val isAbstract: Boolean = isAbstractProp
override def getPartTokens: java.util.List[NCToken] = parts.asJava
+ override def isAbstract: Boolean = isAbstr
def setParts(parts: Seq[NCToken]): Unit = this.parts = parts
@@ -155,7 +155,7 @@ private[nlpcraft] object NCTokenImpl {
startCharIndex = tok.startCharIndex,
endCharIndex = tok.endCharIndex,
meta = convertMeta(),
- isAbstractProp = mdl.model.getAbstractTokens.contains(elm.getId)
+ isAbstr = mdl.model.getAbstractTokens.contains(elm.getId)
)
case None ⇒
@@ -179,7 +179,7 @@ private[nlpcraft] object NCTokenImpl {
startCharIndex = tok.startCharIndex,
endCharIndex = tok.endCharIndex,
meta = convertMeta(),
- isAbstractProp = mdl.model.getAbstractTokens.contains(note.noteType)
+ isAbstr = mdl.model.getAbstractTokens.contains(note.noteType)
)
}
}