You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nlpcraft.apache.org by ar...@apache.org on 2021/07/23 05:39:26 UTC
[incubator-nlpcraft] 02/02: WIP on NLPCRAFT-369.
This is an automated email from the ASF dual-hosted git repository.
aradzinski pushed a commit to branch NLPCRAFT-369
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
commit 4418fa774e166f98e19142ed605a608ba723f5f2
Author: Aaron Radzinski <ar...@datalingvo.com>
AuthorDate: Thu Jul 22 22:39:12 2021 -0700
WIP on NLPCRAFT-369.
---
.../scala/org/apache/nlpcraft/model/NCToken.java | 9 ++++
.../nlpcraft/model/intent/NCIdlIntentOptions.scala | 5 +++
.../model/intent/compiler/NCIdlCompiler.scala | 23 ++++++++--
.../model/intent/solver/NCIntentSolverEngine.scala | 50 +++++++++++++---------
.../intent/idl/compiler/NCIdlCompilerSpec.scala | 20 +++++++++
5 files changed, 83 insertions(+), 24 deletions(-)
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCToken.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCToken.java
index f6d8937..374e667 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCToken.java
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCToken.java
@@ -488,6 +488,15 @@ public interface NCToken extends NCMetadata {
}
/**
+ * Tests whether or not this token is not for a user-defined model element.
+ *
+ * @return {@code True} if this token is not defined by the user model element, {@code false} otherwise.
+ */
+ default boolean isSystemDefined() {
+ return !isUserDefined();
+ }
+
+ /**
* Whether or not this token is abstract.
* <p>
* An abstract token is only detected when it is either a constituent part of some other non-abstract token
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/intent/NCIdlIntentOptions.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/intent/NCIdlIntentOptions.scala
index 3249483..262fa1d 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/intent/NCIdlIntentOptions.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/intent/NCIdlIntentOptions.scala
@@ -37,6 +37,11 @@ class NCIdlIntentOptions {
var ignoreUnusedUserTokens: Boolean = false
/**
+ * Whether or not to allow intent to match if all matching tokens came from STM only.
+ */
+ var allowStmTokenOnly: Boolean = false
+
+ /**
* Whether or not the order of term is important for intent match.
*/
var ordered: Boolean = false
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/intent/compiler/NCIdlCompiler.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/intent/compiler/NCIdlCompiler.scala
index e5d2653..2066f18 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/intent/compiler/NCIdlCompiler.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/intent/compiler/NCIdlCompiler.scala
@@ -98,6 +98,19 @@ object NCIdlCompiler extends LazyLogging {
*/
def getCompiledSynonym: NCIdlSynonym = synonym
+ /**
+ *
+ * @param json
+ * @param ctx
+ * @return
+ */
+ private def json2Obj(json: String)(ctx: ParserRuleContext): Map[String, Object] =
+ try
+ U.jsonToScalaMap(json)
+ catch {
+ case e: Exception => throw newSyntaxError(s"Invalid JSON (${e.getMessage})")(ctx)
+ }
+
/*
* Shared/common implementation.
*/
@@ -110,9 +123,9 @@ object NCIdlCompiler extends LazyLogging {
override def exitCallExpr(ctx: IDP.CallExprContext): Unit = expr += parseCallExpr(ctx.FUN_NAME())(ctx)
override def exitAtom(ctx: IDP.AtomContext): Unit = expr += parseAtom(ctx.getText)(ctx)
override def exitTermEq(ctx: IDP.TermEqContext): Unit = termConv = ctx.TILDA() != null
- override def exitFragMeta(ctx: IDP.FragMetaContext): Unit = fragMeta = U.jsonToScalaMap(ctx.jsonObj().getText)
- override def exitMetaDecl(ctx: IDP.MetaDeclContext): Unit = intentMeta = U.jsonToScalaMap(ctx.jsonObj().getText)
- override def exitOptDecl (ctx: IDP.OptDeclContext): Unit = intentOpts = convertToOptions(U.jsonToScalaMap(ctx.jsonObj().getText))(ctx)
+ override def exitFragMeta(ctx: IDP.FragMetaContext): Unit = fragMeta = json2Obj(ctx.jsonObj().getText)(ctx)
+ override def exitMetaDecl(ctx: IDP.MetaDeclContext): Unit = intentMeta = json2Obj(ctx.jsonObj().getText)(ctx)
+ override def exitOptDecl (ctx: IDP.OptDeclContext): Unit = intentOpts = convertToOptions(json2Obj(ctx.jsonObj().getText)(ctx))(ctx)
override def exitIntentId(ctx: IDP.IntentIdContext): Unit = intentId = ctx.id().getText
override def exitAlias(ctx: IDP.AliasContext): Unit = alias = ctx.id().getText
@@ -122,7 +135,7 @@ object NCIdlCompiler extends LazyLogging {
def boolVal(k: String, v: Object): Boolean =
v match {
case b: java.lang.Boolean if b != null => b
- case _ => throw newSyntaxError(s"Invalid intent option value: $k")(ctx)
+ case _ => throw newSyntaxError(s"Expecting boolean value for intent option: $k")(ctx)
}
for ((k, v) <- json) {
@@ -134,6 +147,8 @@ object NCIdlCompiler extends LazyLogging {
opts.ignoreUnusedSystemTokens = boolVal(k, v)
else if (k == "unused_user_toks")
opts.ignoreUnusedUserTokens = boolVal(k, v)
+ else if (k == "allow_stm_only")
+ opts.allowStmTokenOnly = boolVal(k, v)
else
throw newSyntaxError(s"Unknown intent option: $k")(ctx)
}
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/intent/solver/NCIntentSolverEngine.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/intent/solver/NCIntentSolverEngine.scala
index 65e5280..8612482 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/intent/solver/NCIntentSolverEngine.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/intent/solver/NCIntentSolverEngine.scala
@@ -129,7 +129,7 @@ object NCIntentSolverEngine extends LazyLogging with NCOpenCensusTrace {
* @param used
* @param token
*/
- private case class UsedToken(
+ private case class UseToken(
var used: Boolean,
var conv: Boolean,
token: NCToken
@@ -142,7 +142,7 @@ object NCIntentSolverEngine extends LazyLogging with NCOpenCensusTrace {
*/
private case class TermMatch(
termId: Option[String],
- usedTokens: List[UsedToken],
+ usedTokens: List[UseToken],
weight: Weight
) {
lazy val maxIndex: Int = usedTokens.maxBy(_.token.index).token.index
@@ -155,7 +155,7 @@ object NCIntentSolverEngine extends LazyLogging with NCOpenCensusTrace {
*/
private case class TermTokensGroup(
term: NCIdlTerm,
- usedTokens: List[UsedToken]
+ usedTokens: List[UseToken]
)
/**
@@ -210,13 +210,13 @@ object NCIntentSolverEngine extends LazyLogging with NCOpenCensusTrace {
val callback = pair._2
// Isolated sentence tokens.
- val senToks = Seq.empty[UsedToken] ++ availToks.map(UsedToken(false, false, _))
+ val senToks = Seq.empty[UseToken] ++ availToks.map(UseToken(false, false, _))
val senTokGroups = availToks.map(t => if (t.getGroups != null) t.getGroups.asScala.sorted else Seq.empty)
// Isolated conversation tokens.
val convToks =
if (intent.terms.exists(_.conv))
- Seq.empty[UsedToken] ++
+ Seq.empty[UseToken] ++
// We shouldn't mix tokens with same group from conversation
// history and processed sentence.
ctx.getConversation.getTokens.asScala.
@@ -225,9 +225,9 @@ object NCIntentSolverEngine extends LazyLogging with NCOpenCensusTrace {
!senTokGroups.exists(convTokGroups.containsSlice)
}).
- map(UsedToken(used = false, conv = true, _))
+ map(UseToken(used = false, conv = true, _))
else
- Seq.empty[UsedToken]
+ Seq.empty[UseToken]
// Solve intent in isolation.
solveIntent(ctx, intent, senToks, convToks, vrnIdx) match {
@@ -408,15 +408,15 @@ object NCIntentSolverEngine extends LazyLogging with NCOpenCensusTrace {
* @param convToks
* @return
*/
- //noinspection DuplicatedCode
private def solveIntent(
ctx: NCContext,
intent: NCIdlIntent,
- senToks: Seq[UsedToken],
- convToks: Seq[UsedToken],
+ senToks: Seq[UseToken],
+ convToks: Seq[UseToken],
varIdx: Int
): Option[IntentMatch] = {
val intentId = intent.id
+ val opts = intent.options
val flow = NCDialogFlowManager.getDialogFlow(ctx.getRequest.getUser.getId, ctx.getModel.getId)
val varStr = s"(variant #${varIdx + 1})"
val flowRegex = intent.flowRegex
@@ -479,7 +479,6 @@ object NCIntentSolverEngine extends LazyLogging with NCOpenCensusTrace {
val intentW = new Weight()
val intentGrps = mutable.ListBuffer.empty[TermTokensGroup]
var abort = false
- val opts = intent.options
var lastTermMatch: TermMatch = null
// Conversation metadata (shared across all terms).
@@ -549,17 +548,28 @@ object NCIntentSolverEngine extends LazyLogging with NCOpenCensusTrace {
var res: Option[IntentMatch] = None
- if (usedSenToks.isEmpty && usedConvToks.nonEmpty)
+ if (!opts.allowStmTokenOnly && usedSenToks.isEmpty && usedConvToks.nonEmpty)
logger.info(s"Intent '$intentId' ${bo(r("did not match"))} because all its matched tokens came from STM $varStr.")
- else if (unusedSenToks.exists(_.token.isUserDefined))
+ else if (!opts.ignoreUnusedFreeWords && unusedSenToks.exists(_.token.isFreeWord))
+ logger.info(s"Intent '$intentId' ${bo(r("did not match"))} because of unused free words $varStr.")
+ else if (!opts.ignoreUnusedUserTokens && unusedSenToks.exists(_.token.isUserDefined))
NCTokenLogger.prepareTable(unusedSenToks.filter(_.token.isUserDefined).map(_.token)).
info(
logger,
Some(
- s"Intent '$intentId' ${bo(r("did not match"))} because of remaining unused user tokens $varStr." +
+ s"Intent '$intentId' ${bo(r("did not match"))} because of unused user tokens $varStr." +
s"\nUnused user tokens for intent '$intentId' $varStr:"
)
)
+ else if (!opts.ignoreUnusedSystemTokens && unusedSenToks.exists(_.token.isSystemDefined))
+ NCTokenLogger.prepareTable(unusedSenToks.filter(_.token.isSystemDefined).map(_.token)).
+ info(
+ logger,
+ Some(
+ s"Intent '$intentId' ${bo(r("did not match"))} because of unused system tokens $varStr." +
+ s"\nUnused system tokens for intent '$intentId' $varStr:"
+ )
+ )
else {
if (usedSenToks.isEmpty && usedConvToks.isEmpty)
logger.warn(s"Intent '$intentId' ${bo(y("matched"))} but no tokens were used $varStr.")
@@ -598,8 +608,8 @@ object NCIntentSolverEngine extends LazyLogging with NCOpenCensusTrace {
private def solveTerm(
term: NCIdlTerm,
ctx: NCIdlContext,
- senToks: Seq[UsedToken],
- convToks: Seq[UsedToken]
+ senToks: Seq[UseToken],
+ convToks: Seq[UseToken]
): Option[TermMatch] = {
if (senToks.isEmpty && convToks.isEmpty)
logger.warn(s"No tokens available to match on for term '${term.toAnsiString}'.")
@@ -648,14 +658,14 @@ object NCIntentSolverEngine extends LazyLogging with NCOpenCensusTrace {
ctx: NCIdlContext,
min: Int,
max: Int,
- senToks: Seq[UsedToken],
- convToks: Seq[UsedToken]
- ): Option[(List[UsedToken], Weight)] = {
+ senToks: Seq[UseToken],
+ convToks: Seq[UseToken]
+ ): Option[(List[UseToken], Weight)] = {
// Algorithm is "hungry", i.e. it will fetch all tokens satisfying item's predicate
// in entire sentence even if these tokens are separated by other already used tokens
// and conversation will be used only to get to the 'max' number of the item.
- var usedToks = List.empty[UsedToken]
+ var usedToks = List.empty[UseToken]
var matches = 0
var tokUses = 0
diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/model/intent/idl/compiler/NCIdlCompilerSpec.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/model/intent/idl/compiler/NCIdlCompilerSpec.scala
index adb717c..d1a624f 100644
--- a/nlpcraft/src/test/scala/org/apache/nlpcraft/model/intent/idl/compiler/NCIdlCompilerSpec.scala
+++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/model/intent/idl/compiler/NCIdlCompilerSpec.scala
@@ -178,6 +178,26 @@ class NCIdlCompilerSpec {
checkCompileError(
"""
|intent=i1
+ | options={'ordered': null}
+ | flow="a[^0-9]b"
+ | meta={'a': true, 'b': {'Москва': [1, 2, 3]}}
+ | term(t1)={2 == 2 && size(tok_id()) != -25}
+ |""".stripMargin
+ )
+
+ checkCompileError(
+ """
+ |intent=i1
+ | options={'ordered': false, 'ordered': true}
+ | flow="a[^0-9]b"
+ | meta={'a': true, 'b': {'Москва': [1, 2, 3]}}
+ | term(t1)={2 == 2 && size(tok_id()) != -25}
+ |""".stripMargin
+ )
+
+ checkCompileError(
+ """
+ |intent=i1
|/*
| * +=====================+
| * | block comments......|