You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nlpcraft.apache.org by ar...@apache.org on 2021/07/23 05:39:24 UTC

[incubator-nlpcraft] branch NLPCRAFT-369 updated (4ef9410 -> 4418fa7)

This is an automated email from the ASF dual-hosted git repository.

aradzinski pushed a change to branch NLPCRAFT-369
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git.


    from 4ef9410  WIP on NLPCRAFT-369
     new ff7dcd3  WIP on NLPCRAFT-369.
     new 4418fa7  WIP on NLPCRAFT-369.

The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 .../scala/org/apache/nlpcraft/model/NCToken.java   |  9 +++
 .../nlpcraft/model/intent/NCIdlIntentOptions.scala |  5 ++
 .../model/intent/compiler/NCIdlCompiler.scala      | 29 ++++++---
 .../model/intent/solver/NCIntentSolverEngine.scala | 50 +++++++++-------
 .../intent/idl/compiler/NCIdlCompilerSpec.scala    | 68 +++++++++++++++++++++-
 5 files changed, 133 insertions(+), 28 deletions(-)

[incubator-nlpcraft] 01/02: WIP on NLPCRAFT-369.

Posted by ar...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

aradzinski pushed a commit to branch NLPCRAFT-369
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git

commit ff7dcd32b6bde70568923bc2abfd6c16eaa56fec
Author: Aaron Radzinski <ar...@datalingvo.com>
AuthorDate: Thu Jul 22 14:07:05 2021 -0700

    WIP on NLPCRAFT-369.
---
 .../model/intent/compiler/NCIdlCompiler.scala      |  8 ++--
 .../intent/idl/compiler/NCIdlCompilerSpec.scala    | 48 +++++++++++++++++++++-
 2 files changed, 51 insertions(+), 5 deletions(-)

diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/intent/compiler/NCIdlCompiler.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/intent/compiler/NCIdlCompiler.scala
index 7ba490c..e5d2653 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/intent/compiler/NCIdlCompiler.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/intent/compiler/NCIdlCompiler.scala
@@ -122,17 +122,17 @@ object NCIdlCompiler extends LazyLogging {
             def boolVal(k: String, v: Object): Boolean =
                 v match {
                     case b: java.lang.Boolean if b != null => b
-                    case _ => throw newSyntaxError(s"Invalid intent option: $k")(ctx)
+                    case _ => throw newSyntaxError(s"Invalid intent option value: $k")(ctx)
                 }
 
             for ((k, v) <- json) {
                 if (k == "ordered")
                     opts.ordered = boolVal(k, v)
-                if (k == "unused_free_words")
+                else if (k == "unused_free_words")
                     opts.ignoreUnusedFreeWords = boolVal(k, v)
-                if (k == "unused_sys_toks")
+                else if (k == "unused_sys_toks")
                     opts.ignoreUnusedSystemTokens = boolVal(k, v)
-                if (k == "unused_user_toks")
+                else if (k == "unused_user_toks")
                     opts.ignoreUnusedUserTokens = boolVal(k, v)
                 else
                     throw newSyntaxError(s"Unknown intent option: $k")(ctx)
diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/model/intent/idl/compiler/NCIdlCompilerSpec.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/model/intent/idl/compiler/NCIdlCompilerSpec.scala
index 6fbc15a..adb717c 100644
--- a/nlpcraft/src/test/scala/org/apache/nlpcraft/model/intent/idl/compiler/NCIdlCompilerSpec.scala
+++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/model/intent/idl/compiler/NCIdlCompilerSpec.scala
@@ -101,6 +101,10 @@ class NCIdlCompilerSpec {
               | * +=====================+
               | */
               |intent=i1
+              |     options={
+              |         'ordered': true,
+              |         'unused_free_words': false
+              |     }
               |     flow="a[^0-9]b" // Flow comment.
               |     term(t1)={has(json("{'a': true, 'b\'2': {'arr': [1, 2, 3]}}"), list("موسكو\"", 'v1\'v1', "k2", "v2"))}
               |     fragment(f1, {'a': true, 'b': ["s1", "s2"]}) /* Another fragment. */
@@ -118,6 +122,7 @@ class NCIdlCompilerSpec {
               |     term~/class#method/
               |
               |intent=i1
+              |     options={}
               |     flow="a[^0-9]b"
               |     term(t1)={has(json("{'a': true, 'b\'2': {'arr': [1, 2, 3]}}"), list("موسكو\"", 'v1\'v1', "k2", "v2"))}
               |     fragment(f21, {'a': true, 'b': ["s1", "s2"]})
@@ -129,7 +134,47 @@ class NCIdlCompilerSpec {
     @throws[NCException]
     def testInlineCompileFail(): Unit = {
         NCIdlCompilerGlobal.clearCache(MODEL_ID)
-        
+
+        checkCompileError(
+            """
+              |intent=i1
+              |     options={'ordered': 1}
+              |     flow="a[^0-9]b"
+              |     meta={'a': true, 'b': {'Москва': [1, 2, 3]}}
+              |     term(t1)={2 == 2 && size(tok_id()) != -25}
+              |""".stripMargin
+        )
+
+        checkCompileError(
+            """
+              |intent=i1
+              |     options={'ordered1': false}
+              |     flow="a[^0-9]b"
+              |     meta={'a': true, 'b': {'Москва': [1, 2, 3]}}
+              |     term(t1)={2 == 2 && size(tok_id()) != -25}
+              |""".stripMargin
+        )
+
+        checkCompileError(
+            """
+              |intent=i1
+              |     options={'ordered': false, 'unknown': 1}
+              |     flow="a[^0-9]b"
+              |     meta={'a': true, 'b': {'Москва': [1, 2, 3]}}
+              |     term(t1)={2 == 2 && size(tok_id()) != -25}
+              |""".stripMargin
+        )
+
+        checkCompileError(
+            """
+              |intent=i1
+              |     options={'ordered': false_1} # Broken JSON.
+              |     flow="a[^0-9]b"
+              |     meta={'a': true, 'b': {'Москва': [1, 2, 3]}}
+              |     term(t1)={2 == 2 && size(tok_id()) != -25}
+              |""".stripMargin
+        )
+
         checkCompileError(
             """
               |intent=i1
@@ -228,6 +273,7 @@ class NCIdlCompilerSpec {
         checkCompileError(
             """
               |fragment=f111
+              |     options={'ordered': 1}
               |     term(t1)={2==2}
               |     term~/class#method/
               |

[incubator-nlpcraft] 02/02: WIP on NLPCRAFT-369.

Posted by ar...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

aradzinski pushed a commit to branch NLPCRAFT-369
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git

commit 4418fa774e166f98e19142ed605a608ba723f5f2
Author: Aaron Radzinski <ar...@datalingvo.com>
AuthorDate: Thu Jul 22 22:39:12 2021 -0700

    WIP on NLPCRAFT-369.
---
 .../scala/org/apache/nlpcraft/model/NCToken.java   |  9 ++++
 .../nlpcraft/model/intent/NCIdlIntentOptions.scala |  5 +++
 .../model/intent/compiler/NCIdlCompiler.scala      | 23 ++++++++--
 .../model/intent/solver/NCIntentSolverEngine.scala | 50 +++++++++++++---------
 .../intent/idl/compiler/NCIdlCompilerSpec.scala    | 20 +++++++++
 5 files changed, 83 insertions(+), 24 deletions(-)

diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCToken.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCToken.java
index f6d8937..374e667 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCToken.java
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCToken.java
@@ -488,6 +488,15 @@ public interface NCToken extends NCMetadata {
     }
 
     /**
+     * Tests whether or not this token is not for a user-defined model element.
+     *
+     * @return {@code True} if this token is not defined by the user model element, {@code false} otherwise.
+     */
+    default boolean isSystemDefined() {
+        return !isUserDefined();
+    }
+
+    /**
      * Whether or not this token is abstract.
      * <p>
      * An abstract token is only detected when it is either a constituent part of some other non-abstract token
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/intent/NCIdlIntentOptions.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/intent/NCIdlIntentOptions.scala
index 3249483..262fa1d 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/intent/NCIdlIntentOptions.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/intent/NCIdlIntentOptions.scala
@@ -37,6 +37,11 @@ class NCIdlIntentOptions {
     var ignoreUnusedUserTokens: Boolean = false
 
     /**
+     * Whether or not to allow intent to match if all matching tokens came from STM only.
+     */
+    var allowStmTokenOnly: Boolean = false
+
+    /**
      * Whether or not the order of term is important for intent match.
      */
     var ordered: Boolean = false
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/intent/compiler/NCIdlCompiler.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/intent/compiler/NCIdlCompiler.scala
index e5d2653..2066f18 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/intent/compiler/NCIdlCompiler.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/intent/compiler/NCIdlCompiler.scala
@@ -98,6 +98,19 @@ object NCIdlCompiler extends LazyLogging {
          */
         def getCompiledSynonym: NCIdlSynonym = synonym
 
+        /**
+         *
+         * @param json
+         * @param ctx
+         * @return
+         */
+        private def json2Obj(json: String)(ctx: ParserRuleContext): Map[String, Object] =
+            try
+                U.jsonToScalaMap(json)
+            catch {
+                case e: Exception => throw newSyntaxError(s"Invalid JSON (${e.getMessage})")(ctx)
+            }
+
         /*
          * Shared/common implementation.
          */
@@ -110,9 +123,9 @@ object NCIdlCompiler extends LazyLogging {
         override def exitCallExpr(ctx: IDP.CallExprContext): Unit = expr += parseCallExpr(ctx.FUN_NAME())(ctx)
         override def exitAtom(ctx: IDP.AtomContext): Unit = expr += parseAtom(ctx.getText)(ctx)
         override def exitTermEq(ctx: IDP.TermEqContext): Unit = termConv = ctx.TILDA() != null
-        override def exitFragMeta(ctx: IDP.FragMetaContext): Unit = fragMeta = U.jsonToScalaMap(ctx.jsonObj().getText)
-        override def exitMetaDecl(ctx: IDP.MetaDeclContext): Unit = intentMeta = U.jsonToScalaMap(ctx.jsonObj().getText)
-        override def exitOptDecl (ctx: IDP.OptDeclContext): Unit = intentOpts = convertToOptions(U.jsonToScalaMap(ctx.jsonObj().getText))(ctx)
+        override def exitFragMeta(ctx: IDP.FragMetaContext): Unit = fragMeta = json2Obj(ctx.jsonObj().getText)(ctx)
+        override def exitMetaDecl(ctx: IDP.MetaDeclContext): Unit = intentMeta = json2Obj(ctx.jsonObj().getText)(ctx)
+        override def exitOptDecl (ctx: IDP.OptDeclContext): Unit = intentOpts = convertToOptions(json2Obj(ctx.jsonObj().getText)(ctx))(ctx)
         override def exitIntentId(ctx: IDP.IntentIdContext): Unit =  intentId = ctx.id().getText
         override def exitAlias(ctx: IDP.AliasContext): Unit = alias = ctx.id().getText
 
@@ -122,7 +135,7 @@ object NCIdlCompiler extends LazyLogging {
             def boolVal(k: String, v: Object): Boolean =
                 v match {
                     case b: java.lang.Boolean if b != null => b
-                    case _ => throw newSyntaxError(s"Invalid intent option value: $k")(ctx)
+                    case _ => throw newSyntaxError(s"Expecting boolean value for intent option: $k")(ctx)
                 }
 
             for ((k, v) <- json) {
@@ -134,6 +147,8 @@ object NCIdlCompiler extends LazyLogging {
                     opts.ignoreUnusedSystemTokens = boolVal(k, v)
                 else if (k == "unused_user_toks")
                     opts.ignoreUnusedUserTokens = boolVal(k, v)
+                else if (k == "allow_stm_only")
+                    opts.allowStmTokenOnly = boolVal(k, v)
                 else
                     throw newSyntaxError(s"Unknown intent option: $k")(ctx)
             }
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/intent/solver/NCIntentSolverEngine.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/intent/solver/NCIntentSolverEngine.scala
index 65e5280..8612482 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/intent/solver/NCIntentSolverEngine.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/intent/solver/NCIntentSolverEngine.scala
@@ -129,7 +129,7 @@ object NCIntentSolverEngine extends LazyLogging with NCOpenCensusTrace {
      * @param used
      * @param token
      */
-    private case class UsedToken(
+    private case class UseToken(
         var used: Boolean,
         var conv: Boolean,
         token: NCToken
@@ -142,7 +142,7 @@ object NCIntentSolverEngine extends LazyLogging with NCOpenCensusTrace {
      */
     private case class TermMatch(
         termId: Option[String],
-        usedTokens: List[UsedToken],
+        usedTokens: List[UseToken],
         weight: Weight
     ) {
         lazy val maxIndex: Int = usedTokens.maxBy(_.token.index).token.index
@@ -155,7 +155,7 @@ object NCIntentSolverEngine extends LazyLogging with NCOpenCensusTrace {
      */
     private case class TermTokensGroup(
         term: NCIdlTerm,
-        usedTokens: List[UsedToken]
+        usedTokens: List[UseToken]
     )
 
     /**
@@ -210,13 +210,13 @@ object NCIntentSolverEngine extends LazyLogging with NCOpenCensusTrace {
                         val callback = pair._2
 
                         // Isolated sentence tokens.
-                        val senToks = Seq.empty[UsedToken] ++ availToks.map(UsedToken(false, false, _))
+                        val senToks = Seq.empty[UseToken] ++ availToks.map(UseToken(false, false, _))
                         val senTokGroups = availToks.map(t => if (t.getGroups != null) t.getGroups.asScala.sorted else Seq.empty)
 
                         // Isolated conversation tokens.
                         val convToks =
                             if (intent.terms.exists(_.conv))
-                                Seq.empty[UsedToken] ++
+                                Seq.empty[UseToken] ++
                                     // We shouldn't mix tokens with same group from conversation
                                     // history and processed sentence.
                                     ctx.getConversation.getTokens.asScala.
@@ -225,9 +225,9 @@ object NCIntentSolverEngine extends LazyLogging with NCOpenCensusTrace {
 
                                             !senTokGroups.exists(convTokGroups.containsSlice)
                                         }).
-                                        map(UsedToken(used = false, conv = true, _))
+                                        map(UseToken(used = false, conv = true, _))
                             else
-                                Seq.empty[UsedToken]
+                                Seq.empty[UseToken]
 
                         // Solve intent in isolation.
                         solveIntent(ctx, intent, senToks, convToks, vrnIdx) match {
@@ -408,15 +408,15 @@ object NCIntentSolverEngine extends LazyLogging with NCOpenCensusTrace {
      * @param convToks
      * @return
      */
-    //noinspection DuplicatedCode
     private def solveIntent(
         ctx: NCContext,
         intent: NCIdlIntent,
-        senToks: Seq[UsedToken],
-        convToks: Seq[UsedToken],
+        senToks: Seq[UseToken],
+        convToks: Seq[UseToken],
         varIdx: Int
     ): Option[IntentMatch] = {
         val intentId = intent.id
+        val opts = intent.options
         val flow = NCDialogFlowManager.getDialogFlow(ctx.getRequest.getUser.getId, ctx.getModel.getId)
         val varStr = s"(variant #${varIdx + 1})"
         val flowRegex = intent.flowRegex
@@ -479,7 +479,6 @@ object NCIntentSolverEngine extends LazyLogging with NCOpenCensusTrace {
             val intentW = new Weight()
             val intentGrps = mutable.ListBuffer.empty[TermTokensGroup]
             var abort = false
-            val opts = intent.options
             var lastTermMatch: TermMatch = null
 
             // Conversation metadata (shared across all terms).
@@ -549,17 +548,28 @@ object NCIntentSolverEngine extends LazyLogging with NCOpenCensusTrace {
 
                 var res: Option[IntentMatch] = None
 
-                if (usedSenToks.isEmpty && usedConvToks.nonEmpty)
+                if (!opts.allowStmTokenOnly && usedSenToks.isEmpty && usedConvToks.nonEmpty)
                     logger.info(s"Intent '$intentId' ${bo(r("did not match"))} because all its matched tokens came from STM $varStr.")
-                else if (unusedSenToks.exists(_.token.isUserDefined))
+                else if (!opts.ignoreUnusedFreeWords && unusedSenToks.exists(_.token.isFreeWord))
+                    logger.info(s"Intent '$intentId' ${bo(r("did not match"))} because of unused free words $varStr.")
+                else if (!opts.ignoreUnusedUserTokens && unusedSenToks.exists(_.token.isUserDefined))
                     NCTokenLogger.prepareTable(unusedSenToks.filter(_.token.isUserDefined).map(_.token)).
                         info(
                             logger,
                             Some(
-                                s"Intent '$intentId' ${bo(r("did not match"))} because of remaining unused user tokens $varStr." +
+                                s"Intent '$intentId' ${bo(r("did not match"))} because of unused user tokens $varStr." +
                                 s"\nUnused user tokens for intent '$intentId' $varStr:"
                             )
                         )
+                else if (!opts.ignoreUnusedSystemTokens && unusedSenToks.exists(_.token.isSystemDefined))
+                    NCTokenLogger.prepareTable(unusedSenToks.filter(_.token.isSystemDefined).map(_.token)).
+                        info(
+                            logger,
+                            Some(
+                                s"Intent '$intentId' ${bo(r("did not match"))} because of unused system tokens $varStr." +
+                                s"\nUnused system tokens for intent '$intentId' $varStr:"
+                            )
+                        )
                 else {
                     if (usedSenToks.isEmpty && usedConvToks.isEmpty)
                         logger.warn(s"Intent '$intentId' ${bo(y("matched"))} but no tokens were used $varStr.")
@@ -598,8 +608,8 @@ object NCIntentSolverEngine extends LazyLogging with NCOpenCensusTrace {
     private def solveTerm(
         term: NCIdlTerm,
         ctx: NCIdlContext,
-        senToks: Seq[UsedToken],
-        convToks: Seq[UsedToken]
+        senToks: Seq[UseToken],
+        convToks: Seq[UseToken]
     ): Option[TermMatch] = {
         if (senToks.isEmpty && convToks.isEmpty)
             logger.warn(s"No tokens available to match on for term '${term.toAnsiString}'.")
@@ -648,14 +658,14 @@ object NCIntentSolverEngine extends LazyLogging with NCOpenCensusTrace {
         ctx: NCIdlContext,
         min: Int,
         max: Int,
-        senToks: Seq[UsedToken],
-        convToks: Seq[UsedToken]
-    ): Option[(List[UsedToken], Weight)] = {
+        senToks: Seq[UseToken],
+        convToks: Seq[UseToken]
+    ): Option[(List[UseToken], Weight)] = {
         // Algorithm is "hungry", i.e. it will fetch all tokens satisfying item's predicate
         // in entire sentence even if these tokens are separated by other already used tokens
         // and conversation will be used only to get to the 'max' number of the item.
 
-        var usedToks = List.empty[UsedToken]
+        var usedToks = List.empty[UseToken]
 
         var matches = 0
         var tokUses = 0
diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/model/intent/idl/compiler/NCIdlCompilerSpec.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/model/intent/idl/compiler/NCIdlCompilerSpec.scala
index adb717c..d1a624f 100644
--- a/nlpcraft/src/test/scala/org/apache/nlpcraft/model/intent/idl/compiler/NCIdlCompilerSpec.scala
+++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/model/intent/idl/compiler/NCIdlCompilerSpec.scala
@@ -178,6 +178,26 @@ class NCIdlCompilerSpec {
         checkCompileError(
             """
               |intent=i1
+              |     options={'ordered': null}
+              |     flow="a[^0-9]b"
+              |     meta={'a': true, 'b': {'Москва': [1, 2, 3]}}
+              |     term(t1)={2 == 2 && size(tok_id()) != -25}
+              |""".stripMargin
+        )
+
+        checkCompileError(
+            """
+              |intent=i1
+              |     options={'ordered': false, 'ordered': true}
+              |     flow="a[^0-9]b"
+              |     meta={'a': true, 'b': {'Москва': [1, 2, 3]}}
+              |     term(t1)={2 == 2 && size(tok_id()) != -25}
+              |""".stripMargin
+        )
+
+        checkCompileError(
+            """
+              |intent=i1
               |/*
               | * +=====================+
               | * | block comments......|