You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nlpcraft.apache.org by ar...@apache.org on 2021/07/09 02:01:48 UTC

[incubator-nlpcraft] branch NLPCRAFT-359 updated: WIP on NLPCRAFT-359

This is an automated email from the ASF dual-hosted git repository.

aradzinski pushed a commit to branch NLPCRAFT-359
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git


The following commit(s) were added to refs/heads/NLPCRAFT-359 by this push:
     new 9703eb2  WIP on NLPCRAFT-359
9703eb2 is described below

commit 9703eb2cb0afadee643f3b3e7440aebdff3042e6
Author: Aaron Radzinzski <ar...@datalingvo.com>
AuthorDate: Thu Jul 8 19:01:35 2021 -0700

    WIP on NLPCRAFT-359
---
 .../scala/org/apache/nlpcraft/model/NCIntent.java  |  1 +
 .../org/apache/nlpcraft/model/NCIntentMatch.java   |  1 +
 .../org/apache/nlpcraft/model/NCIntentRef.java     |  1 +
 .../apache/nlpcraft/model/NCIntentSampleRef.java   | 72 +++++++++++++++++++++-
 .../org/apache/nlpcraft/model/NCIntentSkip.java    |  1 +
 .../org/apache/nlpcraft/model/NCIntentTerm.java    |  1 +
 .../model/tools/cmdline/NCCliCommands.scala        |  8 +--
 .../model/tools/test/NCTestAutoModelValidator.java | 23 +++----
 .../nlpcraft/model/tools/test/package-info.java    |  7 ++-
 .../server/sugsyn/NCSuggestSynonymManager.scala    |  2 +-
 nlpcraft/src/test/resources/samples.txt            | 20 ++++++
 .../apache/nlpcraft/model/NCIntentSampleSpec.scala |  4 +-
 openapi/nlpcraft_swagger.yml                       |  2 +-
 13 files changed, 119 insertions(+), 24 deletions(-)

diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCIntent.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCIntent.java
index 3956e3a..5900bf3 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCIntent.java
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCIntent.java
@@ -35,6 +35,7 @@ import static java.lang.annotation.RetentionPolicy.*;
  * @see NCIntentRef
  * @see NCIntentTerm
  * @see NCIntentSample
+ * @see NCIntentSampleRef
  * @see NCIntentSkip
  * @see NCIntentMatch
  * @see NCModel#onMatchedIntent(NCIntentMatch) 
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCIntentMatch.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCIntentMatch.java
index 50813ae..a8776c9 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCIntentMatch.java
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCIntentMatch.java
@@ -33,6 +33,7 @@ import java.util.*;
  * @see NCIntentTerm
  * @see NCIntentSkip
  * @see NCIntentSample
+ * @see NCIntentSampleRef
  * @see NCIntentRef
  */
 public interface NCIntentMatch extends NCMetadata, Serializable {
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCIntentRef.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCIntentRef.java
index 25963a7..7b8c481 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCIntentRef.java
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCIntentRef.java
@@ -33,6 +33,7 @@ import static java.lang.annotation.RetentionPolicy.RUNTIME;
  * @see NCIntent
  * @see NCIntentTerm
  * @see NCIntentSample
+ * @see NCIntentSampleRef
  * @see NCIntentSkip
  * @see NCIntentMatch
  * @see NCModel#onMatchedIntent(NCIntentMatch)
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCIntentSampleRef.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCIntentSampleRef.java
index 19d236b..63cbf25 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCIntentSampleRef.java
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCIntentSampleRef.java
@@ -17,6 +17,8 @@
 
 package org.apache.nlpcraft.model;
 
+import org.apache.nlpcraft.model.tools.test.NCTestAutoModelValidator;
+
 import java.lang.annotation.Documented;
 import java.lang.annotation.Repeatable;
 import java.lang.annotation.Retention;
@@ -26,16 +28,84 @@ import java.lang.annotation.Target;
 import static java.lang.annotation.ElementType.METHOD;
 import static java.lang.annotation.RetentionPolicy.RUNTIME;
 
-// TODO:
+/**
+ * Annotation to define samples of the user input that should match an intent. This
+ * annotation allows to load these samples from the external sources like local file or URL and
+ * should be used together with {@link NCIntent} or {@link NCIntentRef} annotations on the callback
+ * methods. Method can have multiple annotations of this type and each annotation can define multiple input
+ * examples. See similar {@link NCIntentSample} annotation that allows to define samples in place.
+ * <p>
+ * The corpus of intent samples serve several important roles in NLPCraft:
+ * <ul>
+ *     <li>
+ *          It provide code level documentation on what type of user input given intent is supposed to match on.
+ *          In many cases having {@link NCIntent} and {@link NCIntentSample} annotations on the intent callback
+ *          method allows to see all the main ingredients of the language comprehension in one place.
+ *     </li>
+ *     <li>
+ *         It provides a necessary corpus for automated unit and regression testing used by
+ *         {@link NCTestAutoModelValidator} class from
+ *         <a href="https://nlpcraft.apache.org/tools/test_framework.html">built-in test framework</a>.
+ *         This class auto-validates that provided samples are matched on by their corresponding intents.
+ *     </li>
+ *     <li>
+ *         This corpus is used by various statistical tools like
+ *         <a href="https://nlpcraft.apache.org/tools/syn_tool.html">synonyms tool</a> and category value enrichment. Both
+ *         of these tools utilize Google's BERT and Facebook fasttext models and require at least minimal corpus of
+ *         samples for each intent.
+ *     </li>
+ * </ul>
+ * <p>
+ * Here's an example of using this annotation:
+ * <pre class="brush: java, highlight: [2]">
+ * {@literal @}NCIntentRef("alarm")
+ * {@literal @}NCIntentSampleRef("alarm_samples.txt")
+ * NCResult onMatch(
+ *      NCIntentMatch ctx,
+ *      {@literal @}NCIntentTerm("nums") List&lt;NCToken&gt; numToks
+ * ) {
+ *     ...
+ * }
+ * </pre>
+ * <p>
+ * Read full documentation in <a target=_ href="https://nlpcraft.apache.org/intent-matching.html">Intent Matching</a> section and review
+ * <a target=_ href="https://github.com/apache/incubator-nlpcraft/tree/master/nlpcraft-examples">examples</a>.
+ *
+ * @see NCIntentSample
+ * @see NCIntent
+ * @see NCIntentRef
+ * @see NCIntentTerm
+ * @see NCIntentSkip
+ * @see NCIntentMatch
+ * @see NCModel#onMatchedIntent(NCIntentMatch)
+ * @see NCTestAutoModelValidator
+ */
 @Retention(value=RUNTIME)
 @Target(value=METHOD)
 @Repeatable(NCIntentSampleRef.NCIntentSampleList.class)
 public @interface NCIntentSampleRef {
+    /**
+     * Local file path, classpath resource path or URL supported by {@link java.net.URL} class. The content of the source
+     * should be a new-line separated list of string. Empty strings and strings starting with '#" (hash) symbol will
+     * be ignored. This annotation should be attached the intent callback method. Note that using this annotation is equivalent
+     * to using {@link NCIntentSample} annotation and listing all of its samples in place instead of an external source.
+     *
+     * @return Local file path, classpath resource path or URL supported by {@link java.net.URL} class.
+     */
     String value();
+
+    /**
+     * Grouping annotation required for when more than one {@link NCIntentSampleRef} annotation is used.
+     */
     @Retention(RetentionPolicy.RUNTIME)
     @Target(value=METHOD)
     @Documented
     @interface NCIntentSampleList {
+        /**
+         * Gets the list of all {@link NCIntentSampleRef} annotations attached to the callback.
+         *
+         * @return List of all {@link NCIntentSampleRef} annotations attached to the callback.
+         */
         NCIntentSampleRef[] value();
     }
 }
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCIntentSkip.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCIntentSkip.java
index 96fa3a0..2bff30f 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCIntentSkip.java
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCIntentSkip.java
@@ -37,6 +37,7 @@ import org.apache.nlpcraft.common.*;
  * @see NCIntentTerm
  * @see NCIntentRef
  * @see NCIntentSample
+ * @see NCIntentSampleRef
  * @see NCIntentMatch
  * @see NCModel#onMatchedIntent(NCIntentMatch)
  */
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCIntentTerm.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCIntentTerm.java
index 7f548c0..9d6a16c 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCIntentTerm.java
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCIntentTerm.java
@@ -39,6 +39,7 @@ import static java.lang.annotation.RetentionPolicy.*;
  * @see NCIntent
  * @see NCIntentRef
  * @see NCIntentSample
+ * @see NCIntentSampleRef
  * @see NCIntentSkip
  * @see NCIntentMatch
  * @see NCModel#onMatchedIntent(NCIntentMatch)
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/tools/cmdline/NCCliCommands.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/tools/cmdline/NCCliCommands.scala
index 0d50408..0dcf558 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/tools/cmdline/NCCliCommands.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/tools/cmdline/NCCliCommands.scala
@@ -844,8 +844,8 @@ private [cmdline] object NCCliCommands {
             synopsis = s"Runs ${y("'NCTestAutoModelValidator'")} model auto-validator.",
             desc = Some(
                 s"Auto-validation consists " +
-                s"of starting an embedded probe, scanning all deployed models for ${y("'NCIntentSample'")} annotations and their corresponding " +
-                s"callback methods, submitting each sample input sentences from ${y("'NCIntentSample'")} annotation and " +
+                s"of starting an embedded probe, scanning all deployed models for ${y("'NCIntentSample'")} and ${y("'NCIntentSampleRef'")} annotations and their corresponding " +
+                s"callback methods, submitting each sample input sentences from these annotation and " +
                 s"checking that resulting intent matches the intent the sample was attached to. " +
                 s"See more details at https://nlpcraft.apache.org/tools/test_framework.html"
             ),
@@ -918,8 +918,8 @@ private [cmdline] object NCCliCommands {
             desc = Some(
                 s"Re-runs mode auto-validator with the same parameters as the last run. Works only in REPL mode. " +
                 s"Auto-validation consists " +
-                s"of starting an embedded probe, scanning all deployed models for ${y("'NCIntentSample'")} annotations and their corresponding " +
-                s"callback methods, submitting each sample input sentences from ${y("'NCIntentSample'")} annotation and " +
+                s"of starting an embedded probe, scanning all deployed models for ${y("'NCIntentSample'")} or ${y("'NCIntentSampleRef'")} annotations and their corresponding " +
+                s"callback methods, submitting each sample input sentences from these annotation and " +
                 s"checking that resulting intent matches the intent the sample was attached to. " +
                 s"See more details at https://nlpcraft.apache.org/tools/test_framework.html"
             ),
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/tools/test/NCTestAutoModelValidator.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/tools/test/NCTestAutoModelValidator.java
index 67b49d7..4648cac 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/tools/test/NCTestAutoModelValidator.java
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/tools/test/NCTestAutoModelValidator.java
@@ -21,15 +21,15 @@ import org.apache.nlpcraft.model.*;
 import org.apache.nlpcraft.model.tools.test.impl.*;
 
 /**
- * Data model auto-validator is based on {@link NCIntentSample} annotations. Validation consists of starting an embedded
- * probe, scanning all deployed models for {@link NCIntentSample} annotations and their corresponding callback methods,
- * submitting each sample input sentences from {@link NCIntentSample} annotation and checking that resulting intent
- * matches the intent the sample was attached to.
+ * Data model auto-validator is based on {@link NCIntentSample} and {@link NCIntentSampleRef} annotations. Validation
+ * consists of starting an embedded probe, scanning all deployed models for these annotations and their
+ * corresponding callback methods, submitting each sample input sentences from the annotation and checking that
+ * resulting intent matches the intent the sample was attached to.
  * <p>
- * Note that there can be more than one {@link NCIntentSample} annotation attached to the intent callback. Each such
- * annotation will trigger conversation STM reset before its samples will be submitted. This gives an opportunity
- * to test samples both with and without conversational context as well as the same sample but with multiple different
- * conversation contexts.
+ * Note that there can be more than one {@link NCIntentSample} or {@link NCIntentSampleRef} annotation attached to
+ * the intent callback. Each such  annotation will trigger conversation STM reset before its samples will be submitted.
+ * This gives an opportunity to test samples both with and without conversational context as well as the same
+ * sample but with multiple different conversation contexts.
  * <p>
  * This class can be used in two modes:
  * <ul>
@@ -50,6 +50,7 @@ import org.apache.nlpcraft.model.tools.test.impl.*;
  * for usage of model auto-validator.
  * 
  * @see NCIntentSample
+ * @see NCIntentSampleRef
  * @see NCIntent
  * @see NCIntentRef
  */
@@ -58,7 +59,7 @@ public class NCTestAutoModelValidator {
     public final static String PROP_MODELS = "NLPCRAFT_TEST_MODELS";
 
     /**
-     * Performs validation based on {@link NCIntentSample} annotations.
+     * Performs validation based on {@link NCIntentSample} and {@link NCIntentSampleRef} annotations.
      * <p>
      * This is an entry point for a standalone application that expects two system properties (both optional):
      * <ul>
@@ -84,7 +85,7 @@ public class NCTestAutoModelValidator {
     }
 
     /**
-     * Performs validation based on {@link NCIntentSample} annotations.
+     * Performs validation based on {@link NCIntentSample} and {@link NCIntentSampleRef} annotations.
      * <p>
      * This method accepts two system properties (both optional):
      * <ul>
@@ -111,7 +112,7 @@ public class NCTestAutoModelValidator {
     }
 
     /**
-     * Performs validation based on {@link NCIntentSample} annotations for given model.
+     * Performs validation based on {@link NCIntentSample} and {@link NCIntentSampleRef} annotations for given model.
      * <p>
      * This is a convenient shortcut that is equivalent to setting <code>NLPCRAFT_TEST_MODELS</code> system
      * property (overriding any existing value) with given mode class name and calling {@link #isValid()} method.
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/tools/test/package-info.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/tools/test/package-info.java
index 14a9948..4135586 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/tools/test/package-info.java
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/tools/test/package-info.java
@@ -58,10 +58,11 @@
  * </pre>
  * <p>
  * You can also automatically <b>verify</b> the same model by using {@link org.apache.nlpcraft.model.tools.test.NCTestAutoModelValidator} class without any
- * additional coding utilizing {@link org.apache.nlpcraft.model.NCIntentSample} annotation on the models' callback method.
+ * additional coding utilizing {@link org.apache.nlpcraft.model.NCIntentSample} or
+ * {@link org.apache.nlpcraft.model.NCIntentSampleRef} annotation on the models' callback method.
  * This automatic model validation consists of starting an embedded probe with a given model, scanning
- * for {@link org.apache.nlpcraft.model.NCIntentSample} annotations and their corresponding callback methods,
- * submitting each sample input sentences from {@link org.apache.nlpcraft.model.NCIntentSample} annotation and checking
+ * for these annotations and their corresponding callback methods,
+ * submitting each sample input sentences from the annotations and checking
  * that resulting intent matches the intent the sample was attached to.
  * <p>
  * Add necessary classpath and run the following command:
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/sugsyn/NCSuggestSynonymManager.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/sugsyn/NCSuggestSynonymManager.scala
index 02366ba..76af8a0 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/sugsyn/NCSuggestSynonymManager.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/sugsyn/NCSuggestSynonymManager.scala
@@ -266,7 +266,7 @@ object NCSuggestSynonymManager extends NCService {
                                     map { case (elemId, _) => elemId }
 
                             if (noExElems.nonEmpty)
-                                warns += s"Elements do not have *single word* synonyms in their @NCIntentSample - " +
+                                warns += s"Elements do not have *single word* synonyms in their @NCIntentSample or @NCIntentSampleRef annotations - " +
                                     s"no suggestion can be made: ${noExElems.mkString(", ")}"
 
                             val allReqsCnt = allReqs.map(_._2.size).sum
diff --git a/nlpcraft/src/test/resources/samples.txt b/nlpcraft/src/test/resources/samples.txt
new file mode 100644
index 0000000..4c302c5
--- /dev/null
+++ b/nlpcraft/src/test/resources/samples.txt
@@ -0,0 +1,20 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+x1
+x2
+x3
diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/model/NCIntentSampleSpec.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/model/NCIntentSampleSpec.scala
index 5dfcc9c..4857f40 100644
--- a/nlpcraft/src/test/scala/org/apache/nlpcraft/model/NCIntentSampleSpec.scala
+++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/model/NCIntentSampleSpec.scala
@@ -39,9 +39,7 @@ class NCIntentSampleSpecModel extends NCModelAdapter(
     @NCIntentSample(Array("unknown", "unknown"))
     private def onX1(ctx: NCIntentMatch): NCResult = "OK"
 
-    @NCIntentSample(Array("x1", "x2", "x3"))
-    @NCIntentSample(Array("x1", "x2"))
-    @NCIntentSample(Array("x1"))
+    @NCIntentSampleRef("samples.txt")
     @NCIntent("intent=intent2 term~{tok_id()=='x2'}")
     private def onX2(ctx: NCIntentMatch): NCResult = "OK"
 }
diff --git a/openapi/nlpcraft_swagger.yml b/openapi/nlpcraft_swagger.yml
index 8b239c3..957e65d 100644
--- a/openapi/nlpcraft_swagger.yml
+++ b/openapi/nlpcraft_swagger.yml
@@ -185,7 +185,7 @@ paths:
         - Tools
       summary: Runs model synonym suggestion tool.
       description: >-
-        Runs model synonym suggestion tool that is based on BERT models and uses @NCIntentSample annotation.
+        Runs model synonym suggestion tool that is based on BERT models and uses @NCIntentSample or @NCIntentSampleRef annotations.
         Administrative privileges required.
       operationId: sugsyn
       parameters: