You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nlpcraft.apache.org by ar...@apache.org on 2021/07/23 05:39:26 UTC

[incubator-nlpcraft] 02/02: WIP on NLPCRAFT-369.

This is an automated email from the ASF dual-hosted git repository.

aradzinski pushed a commit to branch NLPCRAFT-369
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git

commit 4418fa774e166f98e19142ed605a608ba723f5f2
Author: Aaron Radzinski <ar...@datalingvo.com>
AuthorDate: Thu Jul 22 22:39:12 2021 -0700

    WIP on NLPCRAFT-369.
---
 .../scala/org/apache/nlpcraft/model/NCToken.java   |  9 ++++
 .../nlpcraft/model/intent/NCIdlIntentOptions.scala |  5 +++
 .../model/intent/compiler/NCIdlCompiler.scala      | 23 ++++++++--
 .../model/intent/solver/NCIntentSolverEngine.scala | 50 +++++++++++++---------
 .../intent/idl/compiler/NCIdlCompilerSpec.scala    | 20 +++++++++
 5 files changed, 83 insertions(+), 24 deletions(-)

diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCToken.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCToken.java
index f6d8937..374e667 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCToken.java
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCToken.java
@@ -488,6 +488,15 @@ public interface NCToken extends NCMetadata {
     }
 
     /**
+     * Tests whether or not this token is not for a user-defined model element.
+     *
+     * @return {@code True} if this token is not defined by the user model element, {@code false} otherwise.
+     */
+    default boolean isSystemDefined() {
+        return !isUserDefined();
+    }
+
+    /**
      * Whether or not this token is abstract.
      * <p>
      * An abstract token is only detected when it is either a constituent part of some other non-abstract token
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/intent/NCIdlIntentOptions.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/intent/NCIdlIntentOptions.scala
index 3249483..262fa1d 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/intent/NCIdlIntentOptions.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/intent/NCIdlIntentOptions.scala
@@ -37,6 +37,11 @@ class NCIdlIntentOptions {
     var ignoreUnusedUserTokens: Boolean = false
 
     /**
+     * Whether or not to allow intent to match if all matching tokens came from STM only.
+     */
+    var allowStmTokenOnly: Boolean = false
+
+    /**
      * Whether or not the order of term is important for intent match.
      */
     var ordered: Boolean = false
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/intent/compiler/NCIdlCompiler.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/intent/compiler/NCIdlCompiler.scala
index e5d2653..2066f18 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/intent/compiler/NCIdlCompiler.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/intent/compiler/NCIdlCompiler.scala
@@ -98,6 +98,19 @@ object NCIdlCompiler extends LazyLogging {
          */
         def getCompiledSynonym: NCIdlSynonym = synonym
 
+        /**
+         *
+         * @param json
+         * @param ctx
+         * @return
+         */
+        private def json2Obj(json: String)(ctx: ParserRuleContext): Map[String, Object] =
+            try
+                U.jsonToScalaMap(json)
+            catch {
+                case e: Exception => throw newSyntaxError(s"Invalid JSON (${e.getMessage})")(ctx)
+            }
+
         /*
          * Shared/common implementation.
          */
@@ -110,9 +123,9 @@ object NCIdlCompiler extends LazyLogging {
         override def exitCallExpr(ctx: IDP.CallExprContext): Unit = expr += parseCallExpr(ctx.FUN_NAME())(ctx)
         override def exitAtom(ctx: IDP.AtomContext): Unit = expr += parseAtom(ctx.getText)(ctx)
         override def exitTermEq(ctx: IDP.TermEqContext): Unit = termConv = ctx.TILDA() != null
-        override def exitFragMeta(ctx: IDP.FragMetaContext): Unit = fragMeta = U.jsonToScalaMap(ctx.jsonObj().getText)
-        override def exitMetaDecl(ctx: IDP.MetaDeclContext): Unit = intentMeta = U.jsonToScalaMap(ctx.jsonObj().getText)
-        override def exitOptDecl (ctx: IDP.OptDeclContext): Unit = intentOpts = convertToOptions(U.jsonToScalaMap(ctx.jsonObj().getText))(ctx)
+        override def exitFragMeta(ctx: IDP.FragMetaContext): Unit = fragMeta = json2Obj(ctx.jsonObj().getText)(ctx)
+        override def exitMetaDecl(ctx: IDP.MetaDeclContext): Unit = intentMeta = json2Obj(ctx.jsonObj().getText)(ctx)
+        override def exitOptDecl (ctx: IDP.OptDeclContext): Unit = intentOpts = convertToOptions(json2Obj(ctx.jsonObj().getText)(ctx))(ctx)
         override def exitIntentId(ctx: IDP.IntentIdContext): Unit =  intentId = ctx.id().getText
         override def exitAlias(ctx: IDP.AliasContext): Unit = alias = ctx.id().getText
 
@@ -122,7 +135,7 @@ object NCIdlCompiler extends LazyLogging {
             def boolVal(k: String, v: Object): Boolean =
                 v match {
                     case b: java.lang.Boolean if b != null => b
-                    case _ => throw newSyntaxError(s"Invalid intent option value: $k")(ctx)
+                    case _ => throw newSyntaxError(s"Expecting boolean value for intent option: $k")(ctx)
                 }
 
             for ((k, v) <- json) {
@@ -134,6 +147,8 @@ object NCIdlCompiler extends LazyLogging {
                     opts.ignoreUnusedSystemTokens = boolVal(k, v)
                 else if (k == "unused_user_toks")
                     opts.ignoreUnusedUserTokens = boolVal(k, v)
+                else if (k == "allow_stm_only")
+                    opts.allowStmTokenOnly = boolVal(k, v)
                 else
                     throw newSyntaxError(s"Unknown intent option: $k")(ctx)
             }
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/intent/solver/NCIntentSolverEngine.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/intent/solver/NCIntentSolverEngine.scala
index 65e5280..8612482 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/intent/solver/NCIntentSolverEngine.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/intent/solver/NCIntentSolverEngine.scala
@@ -129,7 +129,7 @@ object NCIntentSolverEngine extends LazyLogging with NCOpenCensusTrace {
      * @param used
      * @param token
      */
-    private case class UsedToken(
+    private case class UseToken(
         var used: Boolean,
         var conv: Boolean,
         token: NCToken
@@ -142,7 +142,7 @@ object NCIntentSolverEngine extends LazyLogging with NCOpenCensusTrace {
      */
     private case class TermMatch(
         termId: Option[String],
-        usedTokens: List[UsedToken],
+        usedTokens: List[UseToken],
         weight: Weight
     ) {
         lazy val maxIndex: Int = usedTokens.maxBy(_.token.index).token.index
@@ -155,7 +155,7 @@ object NCIntentSolverEngine extends LazyLogging with NCOpenCensusTrace {
      */
     private case class TermTokensGroup(
         term: NCIdlTerm,
-        usedTokens: List[UsedToken]
+        usedTokens: List[UseToken]
     )
 
     /**
@@ -210,13 +210,13 @@ object NCIntentSolverEngine extends LazyLogging with NCOpenCensusTrace {
                         val callback = pair._2
 
                         // Isolated sentence tokens.
-                        val senToks = Seq.empty[UsedToken] ++ availToks.map(UsedToken(false, false, _))
+                        val senToks = Seq.empty[UseToken] ++ availToks.map(UseToken(false, false, _))
                         val senTokGroups = availToks.map(t => if (t.getGroups != null) t.getGroups.asScala.sorted else Seq.empty)
 
                         // Isolated conversation tokens.
                         val convToks =
                             if (intent.terms.exists(_.conv))
-                                Seq.empty[UsedToken] ++
+                                Seq.empty[UseToken] ++
                                     // We shouldn't mix tokens with same group from conversation
                                     // history and processed sentence.
                                     ctx.getConversation.getTokens.asScala.
@@ -225,9 +225,9 @@ object NCIntentSolverEngine extends LazyLogging with NCOpenCensusTrace {
 
                                             !senTokGroups.exists(convTokGroups.containsSlice)
                                         }).
-                                        map(UsedToken(used = false, conv = true, _))
+                                        map(UseToken(used = false, conv = true, _))
                             else
-                                Seq.empty[UsedToken]
+                                Seq.empty[UseToken]
 
                         // Solve intent in isolation.
                         solveIntent(ctx, intent, senToks, convToks, vrnIdx) match {
@@ -408,15 +408,15 @@ object NCIntentSolverEngine extends LazyLogging with NCOpenCensusTrace {
      * @param convToks
      * @return
      */
-    //noinspection DuplicatedCode
     private def solveIntent(
         ctx: NCContext,
         intent: NCIdlIntent,
-        senToks: Seq[UsedToken],
-        convToks: Seq[UsedToken],
+        senToks: Seq[UseToken],
+        convToks: Seq[UseToken],
         varIdx: Int
     ): Option[IntentMatch] = {
         val intentId = intent.id
+        val opts = intent.options
         val flow = NCDialogFlowManager.getDialogFlow(ctx.getRequest.getUser.getId, ctx.getModel.getId)
         val varStr = s"(variant #${varIdx + 1})"
         val flowRegex = intent.flowRegex
@@ -479,7 +479,6 @@ object NCIntentSolverEngine extends LazyLogging with NCOpenCensusTrace {
             val intentW = new Weight()
             val intentGrps = mutable.ListBuffer.empty[TermTokensGroup]
             var abort = false
-            val opts = intent.options
             var lastTermMatch: TermMatch = null
 
             // Conversation metadata (shared across all terms).
@@ -549,17 +548,28 @@ object NCIntentSolverEngine extends LazyLogging with NCOpenCensusTrace {
 
                 var res: Option[IntentMatch] = None
 
-                if (usedSenToks.isEmpty && usedConvToks.nonEmpty)
+                if (!opts.allowStmTokenOnly && usedSenToks.isEmpty && usedConvToks.nonEmpty)
                     logger.info(s"Intent '$intentId' ${bo(r("did not match"))} because all its matched tokens came from STM $varStr.")
-                else if (unusedSenToks.exists(_.token.isUserDefined))
+                else if (!opts.ignoreUnusedFreeWords && unusedSenToks.exists(_.token.isFreeWord))
+                    logger.info(s"Intent '$intentId' ${bo(r("did not match"))} because of unused free words $varStr.")
+                else if (!opts.ignoreUnusedUserTokens && unusedSenToks.exists(_.token.isUserDefined))
                     NCTokenLogger.prepareTable(unusedSenToks.filter(_.token.isUserDefined).map(_.token)).
                         info(
                             logger,
                             Some(
-                                s"Intent '$intentId' ${bo(r("did not match"))} because of remaining unused user tokens $varStr." +
+                                s"Intent '$intentId' ${bo(r("did not match"))} because of unused user tokens $varStr." +
                                 s"\nUnused user tokens for intent '$intentId' $varStr:"
                             )
                         )
+                else if (!opts.ignoreUnusedSystemTokens && unusedSenToks.exists(_.token.isSystemDefined))
+                    NCTokenLogger.prepareTable(unusedSenToks.filter(_.token.isSystemDefined).map(_.token)).
+                        info(
+                            logger,
+                            Some(
+                                s"Intent '$intentId' ${bo(r("did not match"))} because of unused system tokens $varStr." +
+                                s"\nUnused system tokens for intent '$intentId' $varStr:"
+                            )
+                        )
                 else {
                     if (usedSenToks.isEmpty && usedConvToks.isEmpty)
                         logger.warn(s"Intent '$intentId' ${bo(y("matched"))} but no tokens were used $varStr.")
@@ -598,8 +608,8 @@ object NCIntentSolverEngine extends LazyLogging with NCOpenCensusTrace {
     private def solveTerm(
         term: NCIdlTerm,
         ctx: NCIdlContext,
-        senToks: Seq[UsedToken],
-        convToks: Seq[UsedToken]
+        senToks: Seq[UseToken],
+        convToks: Seq[UseToken]
     ): Option[TermMatch] = {
         if (senToks.isEmpty && convToks.isEmpty)
             logger.warn(s"No tokens available to match on for term '${term.toAnsiString}'.")
@@ -648,14 +658,14 @@ object NCIntentSolverEngine extends LazyLogging with NCOpenCensusTrace {
         ctx: NCIdlContext,
         min: Int,
         max: Int,
-        senToks: Seq[UsedToken],
-        convToks: Seq[UsedToken]
-    ): Option[(List[UsedToken], Weight)] = {
+        senToks: Seq[UseToken],
+        convToks: Seq[UseToken]
+    ): Option[(List[UseToken], Weight)] = {
         // Algorithm is "hungry", i.e. it will fetch all tokens satisfying item's predicate
         // in entire sentence even if these tokens are separated by other already used tokens
         // and conversation will be used only to get to the 'max' number of the item.
 
-        var usedToks = List.empty[UsedToken]
+        var usedToks = List.empty[UseToken]
 
         var matches = 0
         var tokUses = 0
diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/model/intent/idl/compiler/NCIdlCompilerSpec.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/model/intent/idl/compiler/NCIdlCompilerSpec.scala
index adb717c..d1a624f 100644
--- a/nlpcraft/src/test/scala/org/apache/nlpcraft/model/intent/idl/compiler/NCIdlCompilerSpec.scala
+++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/model/intent/idl/compiler/NCIdlCompilerSpec.scala
@@ -178,6 +178,26 @@ class NCIdlCompilerSpec {
         checkCompileError(
             """
               |intent=i1
+              |     options={'ordered': null}
+              |     flow="a[^0-9]b"
+              |     meta={'a': true, 'b': {'Москва': [1, 2, 3]}}
+              |     term(t1)={2 == 2 && size(tok_id()) != -25}
+              |""".stripMargin
+        )
+
+        checkCompileError(
+            """
+              |intent=i1
+              |     options={'ordered': false, 'ordered': true}
+              |     flow="a[^0-9]b"
+              |     meta={'a': true, 'b': {'Москва': [1, 2, 3]}}
+              |     term(t1)={2 == 2 && size(tok_id()) != -25}
+              |""".stripMargin
+        )
+
+        checkCompileError(
+            """
+              |intent=i1
               |/*
               | * +=====================+
               | * | block comments......|