You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nlpcraft.apache.org by ar...@apache.org on 2021/07/09 02:01:48 UTC
[incubator-nlpcraft] branch NLPCRAFT-359 updated: WIP on
NLPCRAFT-359
This is an automated email from the ASF dual-hosted git repository.
aradzinski pushed a commit to branch NLPCRAFT-359
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
The following commit(s) were added to refs/heads/NLPCRAFT-359 by this push:
new 9703eb2 WIP on NLPCRAFT-359
9703eb2 is described below
commit 9703eb2cb0afadee643f3b3e7440aebdff3042e6
Author: Aaron Radzinzski <ar...@datalingvo.com>
AuthorDate: Thu Jul 8 19:01:35 2021 -0700
WIP on NLPCRAFT-359
---
.../scala/org/apache/nlpcraft/model/NCIntent.java | 1 +
.../org/apache/nlpcraft/model/NCIntentMatch.java | 1 +
.../org/apache/nlpcraft/model/NCIntentRef.java | 1 +
.../apache/nlpcraft/model/NCIntentSampleRef.java | 72 +++++++++++++++++++++-
.../org/apache/nlpcraft/model/NCIntentSkip.java | 1 +
.../org/apache/nlpcraft/model/NCIntentTerm.java | 1 +
.../model/tools/cmdline/NCCliCommands.scala | 8 +--
.../model/tools/test/NCTestAutoModelValidator.java | 23 +++----
.../nlpcraft/model/tools/test/package-info.java | 7 ++-
.../server/sugsyn/NCSuggestSynonymManager.scala | 2 +-
nlpcraft/src/test/resources/samples.txt | 20 ++++++
.../apache/nlpcraft/model/NCIntentSampleSpec.scala | 4 +-
openapi/nlpcraft_swagger.yml | 2 +-
13 files changed, 119 insertions(+), 24 deletions(-)
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCIntent.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCIntent.java
index 3956e3a..5900bf3 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCIntent.java
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCIntent.java
@@ -35,6 +35,7 @@ import static java.lang.annotation.RetentionPolicy.*;
* @see NCIntentRef
* @see NCIntentTerm
* @see NCIntentSample
+ * @see NCIntentSampleRef
* @see NCIntentSkip
* @see NCIntentMatch
* @see NCModel#onMatchedIntent(NCIntentMatch)
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCIntentMatch.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCIntentMatch.java
index 50813ae..a8776c9 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCIntentMatch.java
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCIntentMatch.java
@@ -33,6 +33,7 @@ import java.util.*;
* @see NCIntentTerm
* @see NCIntentSkip
* @see NCIntentSample
+ * @see NCIntentSampleRef
* @see NCIntentRef
*/
public interface NCIntentMatch extends NCMetadata, Serializable {
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCIntentRef.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCIntentRef.java
index 25963a7..7b8c481 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCIntentRef.java
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCIntentRef.java
@@ -33,6 +33,7 @@ import static java.lang.annotation.RetentionPolicy.RUNTIME;
* @see NCIntent
* @see NCIntentTerm
* @see NCIntentSample
+ * @see NCIntentSampleRef
* @see NCIntentSkip
* @see NCIntentMatch
* @see NCModel#onMatchedIntent(NCIntentMatch)
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCIntentSampleRef.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCIntentSampleRef.java
index 19d236b..63cbf25 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCIntentSampleRef.java
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCIntentSampleRef.java
@@ -17,6 +17,8 @@
package org.apache.nlpcraft.model;
+import org.apache.nlpcraft.model.tools.test.NCTestAutoModelValidator;
+
import java.lang.annotation.Documented;
import java.lang.annotation.Repeatable;
import java.lang.annotation.Retention;
@@ -26,16 +28,84 @@ import java.lang.annotation.Target;
import static java.lang.annotation.ElementType.METHOD;
import static java.lang.annotation.RetentionPolicy.RUNTIME;
-// TODO:
+/**
+ * Annotation to define samples of the user input that should match an intent. This
+ * annotation allows to load these samples from the external sources like local file or URL and
+ * should be used together with {@link NCIntent} or {@link NCIntentRef} annotations on the callback
+ * methods. Method can have multiple annotations of this type and each annotation can define multiple input
+ * examples. See similar {@link NCIntentSample} annotation that allows to define samples in place.
+ * <p>
+ * The corpus of intent samples serve several important roles in NLPCraft:
+ * <ul>
+ * <li>
+ * It provide code level documentation on what type of user input given intent is supposed to match on.
+ * In many cases having {@link NCIntent} and {@link NCIntentSample} annotations on the intent callback
+ * method allows to see all the main ingredients of the language comprehension in one place.
+ * </li>
+ * <li>
+ * It provides a necessary corpus for automated unit and regression testing used by
+ * {@link NCTestAutoModelValidator} class from
+ * <a href="https://nlpcraft.apache.org/tools/test_framework.html">built-in test framework</a>.
+ * This class auto-validates that provided samples are matched on by their corresponding intents.
+ * </li>
+ * <li>
+ * This corpus is used by various statistical tools like
+ * <a href="https://nlpcraft.apache.org/tools/syn_tool.html">synonyms tool</a> and category value enrichment. Both
+ * of these tools utilize Google's BERT and Facebook fasttext models and require at least minimal corpus of
+ * samples for each intent.
+ * </li>
+ * </ul>
+ * <p>
+ * Here's an example of using this annotation:
+ * <pre class="brush: java, highlight: [2]">
+ * {@literal @}NCIntentRef("alarm")
+ * {@literal @}NCIntentSampleRef("alarm_samples.txt")
+ * NCResult onMatch(
+ * NCIntentMatch ctx,
+ * {@literal @}NCIntentTerm("nums") List<NCToken> numToks
+ * ) {
+ * ...
+ * }
+ * </pre>
+ * <p>
+ * Read full documentation in <a target=_ href="https://nlpcraft.apache.org/intent-matching.html">Intent Matching</a> section and review
+ * <a target=_ href="https://github.com/apache/incubator-nlpcraft/tree/master/nlpcraft-examples">examples</a>.
+ *
+ * @see NCIntentSample
+ * @see NCIntent
+ * @see NCIntentRef
+ * @see NCIntentTerm
+ * @see NCIntentSkip
+ * @see NCIntentMatch
+ * @see NCModel#onMatchedIntent(NCIntentMatch)
+ * @see NCTestAutoModelValidator
+ */
@Retention(value=RUNTIME)
@Target(value=METHOD)
@Repeatable(NCIntentSampleRef.NCIntentSampleList.class)
public @interface NCIntentSampleRef {
+ /**
+ * Local file path, classpath resource path or URL supported by {@link java.net.URL} class. The content of the source
+ * should be a new-line separated list of string. Empty strings and strings starting with '#" (hash) symbol will
+ * be ignored. This annotation should be attached the intent callback method. Note that using this annotation is equivalent
+ * to using {@link NCIntentSample} annotation and listing all of its samples in place instead of an external source.
+ *
+ * @return Local file path, classpath resource path or URL supported by {@link java.net.URL} class.
+ */
String value();
+
+ /**
+ * Grouping annotation required for when more than one {@link NCIntentSampleRef} annotation is used.
+ */
@Retention(RetentionPolicy.RUNTIME)
@Target(value=METHOD)
@Documented
@interface NCIntentSampleList {
+ /**
+ * Gets the list of all {@link NCIntentSampleRef} annotations attached to the callback.
+ *
+ * @return List of all {@link NCIntentSampleRef} annotations attached to the callback.
+ */
NCIntentSampleRef[] value();
}
}
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCIntentSkip.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCIntentSkip.java
index 96fa3a0..2bff30f 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCIntentSkip.java
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCIntentSkip.java
@@ -37,6 +37,7 @@ import org.apache.nlpcraft.common.*;
* @see NCIntentTerm
* @see NCIntentRef
* @see NCIntentSample
+ * @see NCIntentSampleRef
* @see NCIntentMatch
* @see NCModel#onMatchedIntent(NCIntentMatch)
*/
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCIntentTerm.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCIntentTerm.java
index 7f548c0..9d6a16c 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCIntentTerm.java
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCIntentTerm.java
@@ -39,6 +39,7 @@ import static java.lang.annotation.RetentionPolicy.*;
* @see NCIntent
* @see NCIntentRef
* @see NCIntentSample
+ * @see NCIntentSampleRef
* @see NCIntentSkip
* @see NCIntentMatch
* @see NCModel#onMatchedIntent(NCIntentMatch)
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/tools/cmdline/NCCliCommands.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/tools/cmdline/NCCliCommands.scala
index 0d50408..0dcf558 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/tools/cmdline/NCCliCommands.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/tools/cmdline/NCCliCommands.scala
@@ -844,8 +844,8 @@ private [cmdline] object NCCliCommands {
synopsis = s"Runs ${y("'NCTestAutoModelValidator'")} model auto-validator.",
desc = Some(
s"Auto-validation consists " +
- s"of starting an embedded probe, scanning all deployed models for ${y("'NCIntentSample'")} annotations and their corresponding " +
- s"callback methods, submitting each sample input sentences from ${y("'NCIntentSample'")} annotation and " +
+ s"of starting an embedded probe, scanning all deployed models for ${y("'NCIntentSample'")} and ${y("'NCIntentSampleRef'")} annotations and their corresponding " +
+ s"callback methods, submitting each sample input sentences from these annotation and " +
s"checking that resulting intent matches the intent the sample was attached to. " +
s"See more details at https://nlpcraft.apache.org/tools/test_framework.html"
),
@@ -918,8 +918,8 @@ private [cmdline] object NCCliCommands {
desc = Some(
s"Re-runs mode auto-validator with the same parameters as the last run. Works only in REPL mode. " +
s"Auto-validation consists " +
- s"of starting an embedded probe, scanning all deployed models for ${y("'NCIntentSample'")} annotations and their corresponding " +
- s"callback methods, submitting each sample input sentences from ${y("'NCIntentSample'")} annotation and " +
+ s"of starting an embedded probe, scanning all deployed models for ${y("'NCIntentSample'")} or ${y("'NCIntentSampleRef'")} annotations and their corresponding " +
+ s"callback methods, submitting each sample input sentences from these annotation and " +
s"checking that resulting intent matches the intent the sample was attached to. " +
s"See more details at https://nlpcraft.apache.org/tools/test_framework.html"
),
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/tools/test/NCTestAutoModelValidator.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/tools/test/NCTestAutoModelValidator.java
index 67b49d7..4648cac 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/tools/test/NCTestAutoModelValidator.java
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/tools/test/NCTestAutoModelValidator.java
@@ -21,15 +21,15 @@ import org.apache.nlpcraft.model.*;
import org.apache.nlpcraft.model.tools.test.impl.*;
/**
- * Data model auto-validator is based on {@link NCIntentSample} annotations. Validation consists of starting an embedded
- * probe, scanning all deployed models for {@link NCIntentSample} annotations and their corresponding callback methods,
- * submitting each sample input sentences from {@link NCIntentSample} annotation and checking that resulting intent
- * matches the intent the sample was attached to.
+ * Data model auto-validator is based on {@link NCIntentSample} and {@link NCIntentSampleRef} annotations. Validation
+ * consists of starting an embedded probe, scanning all deployed models for these annotations and their
+ * corresponding callback methods, submitting each sample input sentences from the annotation and checking that
+ * resulting intent matches the intent the sample was attached to.
* <p>
- * Note that there can be more than one {@link NCIntentSample} annotation attached to the intent callback. Each such
- * annotation will trigger conversation STM reset before its samples will be submitted. This gives an opportunity
- * to test samples both with and without conversational context as well as the same sample but with multiple different
- * conversation contexts.
+ * Note that there can be more than one {@link NCIntentSample} or {@link NCIntentSampleRef} annotation attached to
+ * the intent callback. Each such annotation will trigger conversation STM reset before its samples will be submitted.
+ * This gives an opportunity to test samples both with and without conversational context as well as the same
+ * sample but with multiple different conversation contexts.
* <p>
* This class can be used in two modes:
* <ul>
@@ -50,6 +50,7 @@ import org.apache.nlpcraft.model.tools.test.impl.*;
* for usage of model auto-validator.
*
* @see NCIntentSample
+ * @see NCIntentSampleRef
* @see NCIntent
* @see NCIntentRef
*/
@@ -58,7 +59,7 @@ public class NCTestAutoModelValidator {
public final static String PROP_MODELS = "NLPCRAFT_TEST_MODELS";
/**
- * Performs validation based on {@link NCIntentSample} annotations.
+ * Performs validation based on {@link NCIntentSample} and {@link NCIntentSampleRef} annotations.
* <p>
* This is an entry point for a standalone application that expects two system properties (both optional):
* <ul>
@@ -84,7 +85,7 @@ public class NCTestAutoModelValidator {
}
/**
- * Performs validation based on {@link NCIntentSample} annotations.
+ * Performs validation based on {@link NCIntentSample} and {@link NCIntentSampleRef} annotations.
* <p>
* This method accepts two system properties (both optional):
* <ul>
@@ -111,7 +112,7 @@ public class NCTestAutoModelValidator {
}
/**
- * Performs validation based on {@link NCIntentSample} annotations for given model.
+ * Performs validation based on {@link NCIntentSample} and {@link NCIntentSampleRef} annotations for given model.
* <p>
* This is a convenient shortcut that is equivalent to setting <code>NLPCRAFT_TEST_MODELS</code> system
* property (overriding any existing value) with given mode class name and calling {@link #isValid()} method.
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/tools/test/package-info.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/tools/test/package-info.java
index 14a9948..4135586 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/tools/test/package-info.java
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/tools/test/package-info.java
@@ -58,10 +58,11 @@
* </pre>
* <p>
* You can also automatically <b>verify</b> the same model by using {@link org.apache.nlpcraft.model.tools.test.NCTestAutoModelValidator} class without any
- * additional coding utilizing {@link org.apache.nlpcraft.model.NCIntentSample} annotation on the models' callback method.
+ * additional coding utilizing {@link org.apache.nlpcraft.model.NCIntentSample} or
+ * {@link org.apache.nlpcraft.model.NCIntentSampleRef} annotation on the models' callback method.
* This automatic model validation consists of starting an embedded probe with a given model, scanning
- * for {@link org.apache.nlpcraft.model.NCIntentSample} annotations and their corresponding callback methods,
- * submitting each sample input sentences from {@link org.apache.nlpcraft.model.NCIntentSample} annotation and checking
+ * for these annotations and their corresponding callback methods,
+ * submitting each sample input sentences from the annotations and checking
* that resulting intent matches the intent the sample was attached to.
* <p>
* Add necessary classpath and run the following command:
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/sugsyn/NCSuggestSynonymManager.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/sugsyn/NCSuggestSynonymManager.scala
index 02366ba..76af8a0 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/sugsyn/NCSuggestSynonymManager.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/sugsyn/NCSuggestSynonymManager.scala
@@ -266,7 +266,7 @@ object NCSuggestSynonymManager extends NCService {
map { case (elemId, _) => elemId }
if (noExElems.nonEmpty)
- warns += s"Elements do not have *single word* synonyms in their @NCIntentSample - " +
+ warns += s"Elements do not have *single word* synonyms in their @NCIntentSample or @NCIntentSampleRef annotations - " +
s"no suggestion can be made: ${noExElems.mkString(", ")}"
val allReqsCnt = allReqs.map(_._2.size).sum
diff --git a/nlpcraft/src/test/resources/samples.txt b/nlpcraft/src/test/resources/samples.txt
new file mode 100644
index 0000000..4c302c5
--- /dev/null
+++ b/nlpcraft/src/test/resources/samples.txt
@@ -0,0 +1,20 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+x1
+x2
+x3
diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/model/NCIntentSampleSpec.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/model/NCIntentSampleSpec.scala
index 5dfcc9c..4857f40 100644
--- a/nlpcraft/src/test/scala/org/apache/nlpcraft/model/NCIntentSampleSpec.scala
+++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/model/NCIntentSampleSpec.scala
@@ -39,9 +39,7 @@ class NCIntentSampleSpecModel extends NCModelAdapter(
@NCIntentSample(Array("unknown", "unknown"))
private def onX1(ctx: NCIntentMatch): NCResult = "OK"
- @NCIntentSample(Array("x1", "x2", "x3"))
- @NCIntentSample(Array("x1", "x2"))
- @NCIntentSample(Array("x1"))
+ @NCIntentSampleRef("samples.txt")
@NCIntent("intent=intent2 term~{tok_id()=='x2'}")
private def onX2(ctx: NCIntentMatch): NCResult = "OK"
}
diff --git a/openapi/nlpcraft_swagger.yml b/openapi/nlpcraft_swagger.yml
index 8b239c3..957e65d 100644
--- a/openapi/nlpcraft_swagger.yml
+++ b/openapi/nlpcraft_swagger.yml
@@ -185,7 +185,7 @@ paths:
- Tools
summary: Runs model synonym suggestion tool.
description: >-
- Runs model synonym suggestion tool that is based on BERT models and uses @NCIntentSample annotation.
+ Runs model synonym suggestion tool that is based on BERT models and uses @NCIntentSample or @NCIntentSampleRef annotations.
Administrative privileges required.
operationId: sugsyn
parameters: