You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nlpcraft.apache.org by se...@apache.org on 2022/02/22 09:12:27 UTC
[incubator-nlpcraft] branch master updated: Refactoring and bugfixes.
This is an automated email from the ASF dual-hosted git repository.
sergeykamov pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
The following commit(s) were added to refs/heads/master by this push:
new 790f064 Refactoring and bugfixes.
790f064 is described below
commit 790f064906d8ee6362b56e522ae95ea386b6eff1
Author: Sergey Kamov <sk...@gmail.com>
AuthorDate: Tue Feb 22 12:12:12 2022 +0300
Refactoring and bugfixes.
---
.../lightswitch/LightSwitchGroovyModel.groovy | 5 +-
.../examples/lightswitch/LightSwitchJavaModel.java | 5 +-
.../examples/lightswitch/LightSwitchKotlinModel.kt | 6 +-
.../lightswitch/LightSwitchScalaModel.scala | 6 +-
.../lightswitch/NCModelValidationSpec.scala | 8 +-
.../apache/nlpcraft/examples/time/TimeModel.java | 14 +-
.../nlpcraft/examples/weather/WeatherModel.java | 1 +
.../stanford/NCStanfordNLPEntityParserSpec.scala | 4 +-
nlpcraft/pom.xml | 6 -
.../nlpcraft/internal/impl/NCModelClientImpl.scala | 6 +-
.../intent/matcher/NCIntentSolverInput.scala | 33 ---
.../intent/matcher/NCIntentSolverManager.scala | 272 ++++++++++++++-------
.../intent/matcher/NCIntentSolverResult.scala | 47 ----
.../intent/matcher/NCIntentSolverVariant.scala | 61 -----
.../semantic/impl/NCSemanticEntityParserImpl.scala | 6 +-
.../semantic/impl/NCSemanticSourceReader.scala | 15 +-
.../apache/nlpcraft/nlp/util/NCTestPipeline.scala | 8 +-
17 files changed, 232 insertions(+), 271 deletions(-)
diff --git a/nlpcraft-examples/lightswitch/src/main/java/org/apache/nlpcraft/examples/lightswitch/LightSwitchGroovyModel.groovy b/nlpcraft-examples/lightswitch/src/main/java/org/apache/nlpcraft/examples/lightswitch/LightSwitchGroovyModel.groovy
index 24dca4d..71df5eb 100644
--- a/nlpcraft-examples/lightswitch/src/main/java/org/apache/nlpcraft/examples/lightswitch/LightSwitchGroovyModel.groovy
+++ b/nlpcraft-examples/lightswitch/src/main/java/org/apache/nlpcraft/examples/lightswitch/LightSwitchGroovyModel.groovy
@@ -21,6 +21,7 @@ import org.apache.nlpcraft.*
import org.apache.nlpcraft.nlp.entity.parser.semantic.NCSemanticEntityParser
import org.apache.nlpcraft.nlp.entity.parser.semantic.impl.en.NCEnSemanticPorterStemmer
import org.apache.nlpcraft.nlp.token.parser.opennlp.NCOpenNLPTokenParser
+import org.apache.nlpcraft.nlp.token.enricher.en.NCStopWordsTokenEnricher
/**
* This example provides very simple implementation for NLI-powered light switch.
@@ -42,7 +43,9 @@ class LightSwitchGroovyModel implements NCModel {
NCOpenNLPTokenParser tp = new NCOpenNLPTokenParser(tokMdlSrc, posMdlSrc, lemmaDicSrc)
this.cfg = new NCModelConfig("nlpcraft.lightswitch.groovy.ex", "LightSwitch Example Model", "1.0")
- this.pipeline = new NCModelPipelineBuilder(tp, new NCSemanticEntityParser(new NCEnSemanticPorterStemmer(), tp, "lightswitch_model.yaml")).build()
+ this.pipeline = new NCModelPipelineBuilder(tp, new NCSemanticEntityParser(new NCEnSemanticPorterStemmer(), tp, "lightswitch_model.yaml")).
+ withTokenEnricher(new NCStopWordsTokenEnricher()).
+ build()
}
/**
diff --git a/nlpcraft-examples/lightswitch/src/main/java/org/apache/nlpcraft/examples/lightswitch/LightSwitchJavaModel.java b/nlpcraft-examples/lightswitch/src/main/java/org/apache/nlpcraft/examples/lightswitch/LightSwitchJavaModel.java
index 024c6e5..58a4e0e 100644
--- a/nlpcraft-examples/lightswitch/src/main/java/org/apache/nlpcraft/examples/lightswitch/LightSwitchJavaModel.java
+++ b/nlpcraft-examples/lightswitch/src/main/java/org/apache/nlpcraft/examples/lightswitch/LightSwitchJavaModel.java
@@ -21,6 +21,7 @@ import org.apache.nlpcraft.*;
import org.apache.nlpcraft.nlp.entity.parser.semantic.NCSemanticEntityParser;
import org.apache.nlpcraft.nlp.entity.parser.semantic.impl.en.NCEnSemanticPorterStemmer;
import org.apache.nlpcraft.nlp.token.parser.opennlp.NCOpenNLPTokenParser;
+import org.apache.nlpcraft.nlp.token.enricher.en.NCStopWordsTokenEnricher;
import java.util.List;
import java.util.stream.Collectors;
@@ -51,7 +52,9 @@ public class LightSwitchJavaModel implements NCModel {
NCOpenNLPTokenParser tp = new NCOpenNLPTokenParser(tokMdlSrc, posMdlSrc, lemmaDicSrc);
this.cfg = new NCModelConfig("nlpcraft.lightswitch.java.ex", "LightSwitch Example Model", "1.0");
- this.pipeline = new NCModelPipelineBuilder(tp, new NCSemanticEntityParser(new NCEnSemanticPorterStemmer(), tp, "lightswitch_model.yaml")).build();
+ this.pipeline = new NCModelPipelineBuilder(tp, new NCSemanticEntityParser(new NCEnSemanticPorterStemmer(), tp, "lightswitch_model.yaml")).
+ withTokenEnricher(new NCStopWordsTokenEnricher()).
+ build();
}
/**
diff --git a/nlpcraft-examples/lightswitch/src/main/java/org/apache/nlpcraft/examples/lightswitch/LightSwitchKotlinModel.kt b/nlpcraft-examples/lightswitch/src/main/java/org/apache/nlpcraft/examples/lightswitch/LightSwitchKotlinModel.kt
index 12d0e14..88b30b4 100644
--- a/nlpcraft-examples/lightswitch/src/main/java/org/apache/nlpcraft/examples/lightswitch/LightSwitchKotlinModel.kt
+++ b/nlpcraft-examples/lightswitch/src/main/java/org/apache/nlpcraft/examples/lightswitch/LightSwitchKotlinModel.kt
@@ -21,6 +21,7 @@ package org.apache.nlpcraft.examples.lightswitch
import org.apache.nlpcraft.*
import org.apache.nlpcraft.nlp.entity.parser.semantic.NCSemanticEntityParser
import org.apache.nlpcraft.nlp.entity.parser.semantic.impl.en.NCEnSemanticPorterStemmer
+import org.apache.nlpcraft.nlp.token.enricher.en.NCStopWordsTokenEnricher
import org.apache.nlpcraft.nlp.token.parser.opennlp.NCOpenNLPTokenParser
import java.util.*
import java.util.stream.Collectors
@@ -41,7 +42,10 @@ class LightSwitchKotlinModel(tokMdlSrc: String, posMdlSrc: String, lemmaDicSrc:
private val tp = NCOpenNLPTokenParser(tokMdlSrc, posMdlSrc, lemmaDicSrc)
private val cfg = NCModelConfig("nlpcraft.lightswitch.kotlin.ex", "LightSwitch Example Model", "1.0")
- private val pipeline = NCModelPipelineBuilder(tp, NCSemanticEntityParser(NCEnSemanticPorterStemmer(), tp, "lightswitch_model.yaml")).build()
+ private val pipeline =
+ NCModelPipelineBuilder(tp, NCSemanticEntityParser(NCEnSemanticPorterStemmer(), tp, "lightswitch_model.yaml")).
+ withTokenEnricher(NCStopWordsTokenEnricher()).
+ build()
/**
* Intent and its on-match callback.
*
diff --git a/nlpcraft-examples/lightswitch/src/main/java/org/apache/nlpcraft/examples/lightswitch/LightSwitchScalaModel.scala b/nlpcraft-examples/lightswitch/src/main/java/org/apache/nlpcraft/examples/lightswitch/LightSwitchScalaModel.scala
index efb1e1b..3501435 100644
--- a/nlpcraft-examples/lightswitch/src/main/java/org/apache/nlpcraft/examples/lightswitch/LightSwitchScalaModel.scala
+++ b/nlpcraft-examples/lightswitch/src/main/java/org/apache/nlpcraft/examples/lightswitch/LightSwitchScalaModel.scala
@@ -22,6 +22,7 @@ import org.apache.nlpcraft.nlp.entity.parser.nlp.NCNLPEntityParser
import org.apache.nlpcraft.nlp.entity.parser.semantic.NCSemanticEntityParser
import org.apache.nlpcraft.nlp.entity.parser.semantic.impl.en.NCEnSemanticPorterStemmer
import org.apache.nlpcraft.nlp.token.parser.opennlp.NCOpenNLPTokenParser
+import org.apache.nlpcraft.nlp.token.enricher.en.NCStopWordsTokenEnricher
/**
* This example provides very simple implementation for NLI-powered light switch.
@@ -42,7 +43,10 @@ class LightSwitchScalaModel(tokMdlSrc: String, posMdlSrc: String, lemmaDicSrc: S
val tp = new NCOpenNLPTokenParser(tokMdlSrc, posMdlSrc, lemmaDicSrc)
new NCModelPipelineBuilder(
tp,
- new NCSemanticEntityParser(new NCEnSemanticPorterStemmer(), tp, "lightswitch_model.yaml")).build()
+ new NCSemanticEntityParser(new NCEnSemanticPorterStemmer(), tp, "lightswitch_model.yaml")
+ ).
+ withTokenEnricher(new NCStopWordsTokenEnricher()).
+ build()
/**
* Intent and its on-match callback.
diff --git a/nlpcraft-examples/lightswitch/src/test/java/org/apache/nlpcraft/examples/lightswitch/NCModelValidationSpec.scala b/nlpcraft-examples/lightswitch/src/test/java/org/apache/nlpcraft/examples/lightswitch/NCModelValidationSpec.scala
index bb0baf5..12e0c6c 100644
--- a/nlpcraft-examples/lightswitch/src/test/java/org/apache/nlpcraft/examples/lightswitch/NCModelValidationSpec.scala
+++ b/nlpcraft-examples/lightswitch/src/test/java/org/apache/nlpcraft/examples/lightswitch/NCModelValidationSpec.scala
@@ -26,25 +26,25 @@ import scala.util.Using
* JUnit models validation.
*/
class NCModelValidationSpec:
- private val MDL_SCALA = new LightSwitchScalaModel(
+ private lazy val MDL_SCALA = new LightSwitchScalaModel(
"opennlp/en-token.bin",
"opennlp/en-pos-maxent.bin",
"opennlp/en-lemmatizer.dict"
)
- private val MDL_JAVA = new LightSwitchJavaModel(
+ private lazy val MDL_JAVA = new LightSwitchJavaModel(
"opennlp/en-token.bin",
"opennlp/en-pos-maxent.bin",
"opennlp/en-lemmatizer.dict"
)
- private val MDL_GROOVY = new LightSwitchGroovyModel(
+ private lazy val MDL_GROOVY = new LightSwitchGroovyModel(
"opennlp/en-token.bin",
"opennlp/en-pos-maxent.bin",
"opennlp/en-lemmatizer.dict"
)
- private val MDL_KOTLIN = new LightSwitchKotlinModel(
+ private lazy val MDL_KOTLIN = new LightSwitchKotlinModel(
"opennlp/en-token.bin",
"opennlp/en-pos-maxent.bin",
"opennlp/en-lemmatizer.dict"
diff --git a/nlpcraft-examples/time/src/main/java/org/apache/nlpcraft/examples/time/TimeModel.java b/nlpcraft-examples/time/src/main/java/org/apache/nlpcraft/examples/time/TimeModel.java
index 0b80c29..6d8d29e 100644
--- a/nlpcraft-examples/time/src/main/java/org/apache/nlpcraft/examples/time/TimeModel.java
+++ b/nlpcraft-examples/time/src/main/java/org/apache/nlpcraft/examples/time/TimeModel.java
@@ -20,7 +20,6 @@ package org.apache.nlpcraft.examples.time;
import com.fasterxml.jackson.core.*;
import com.fasterxml.jackson.databind.*;
import com.fasterxml.jackson.dataformat.yaml.*;
-import org.apache.commons.lang3.text.WordUtils;
import org.apache.nlpcraft.*;
import org.apache.nlpcraft.examples.utils.cities.CitiesDataProvider;
import org.apache.nlpcraft.examples.utils.cities.City;
@@ -94,8 +93,8 @@ public class TimeModel implements NCModel {
private static NCResult mkResult(String city, String cntry, String tmz, double lat, double lon) {
Map<String, Object> m = new HashMap<>();
- m.put("city", WordUtils.capitalize(city));
- m.put("country", WordUtils.capitalize(cntry));
+ m.put("city", capitalize(city));
+ m.put("country", capitalize(cntry));
m.put("timezone", tmz);
m.put("lat", lat);
m.put("lon", lon);
@@ -116,6 +115,15 @@ public class TimeModel implements NCModel {
}
/**
+ *
+ * @param s
+ * @return
+ */
+ private static String capitalize(String s) {
+ return s == null || s.isEmpty() ? s : s.substring(0, 1).toUpperCase() + s.substring(1, s.length());
+ }
+
+ /**
* Callback on remote time intent match.
*
* @param cityEnt Token for 'geo' term.
diff --git a/nlpcraft-examples/weather/src/main/java/org/apache/nlpcraft/examples/weather/WeatherModel.java b/nlpcraft-examples/weather/src/main/java/org/apache/nlpcraft/examples/weather/WeatherModel.java
index e73c308..1a37d7a 100644
--- a/nlpcraft-examples/weather/src/main/java/org/apache/nlpcraft/examples/weather/WeatherModel.java
+++ b/nlpcraft-examples/weather/src/main/java/org/apache/nlpcraft/examples/weather/WeatherModel.java
@@ -213,6 +213,7 @@ public class WeatherModel implements NCModel {
if (dateEntOpt.isPresent()) { // Date token overrides any indicators.
NCEntity dateEnt = dateEntOpt.get();
+ // TODO: from NNE ?
from = Instant.now();
to = Instant.now();
}
diff --git a/nlpcraft-stanford/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/stanford/NCStanfordNLPEntityParserSpec.scala b/nlpcraft-stanford/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/stanford/NCStanfordNLPEntityParserSpec.scala
index 099af1c..d6102fb 100644
--- a/nlpcraft-stanford/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/stanford/NCStanfordNLPEntityParserSpec.scala
+++ b/nlpcraft-stanford/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/stanford/NCStanfordNLPEntityParserSpec.scala
@@ -31,7 +31,7 @@ class NCStanfordNLPEntityParserSpec:
@Test
def test(): Unit =
- val txt = "Los Angeles, 23 August, 23 and sergeykamov@apache.org"
+ val txt = "Los Angeles, 23 August, 23 and sergeykamov@apache.org, tomorrow"
val toks = EN_STANFORD_PIPELINE.getTokenParser.tokenize(txt)
NCTestUtils.printTokens(toks.asScala.toSeq)
@@ -39,4 +39,4 @@ class NCStanfordNLPEntityParserSpec:
val res = parser.parse(NCTestRequest(txt), CFG, toks)
NCTestUtils.printEntities(txt, res.asScala.toSeq)
- require(res.size() == 4)
\ No newline at end of file
+ require(res.size() == 5)
\ No newline at end of file
diff --git a/nlpcraft/pom.xml b/nlpcraft/pom.xml
index 9f4a02a..6746652 100644
--- a/nlpcraft/pom.xml
+++ b/nlpcraft/pom.xml
@@ -33,12 +33,6 @@
<dependencies>
<dependency>
- <groupId>org.apache.commons</groupId>
- <artifactId>commons-lang3</artifactId>
- <version>${commons.lang3.ver}</version>
- </dependency>
-
- <dependency>
<groupId>co.blocke</groupId>
<artifactId>scala-reflection_${scala.major.ver}</artifactId>
<version>${scala3.ref.ver}</version>
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/impl/NCModelClientImpl.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/impl/NCModelClientImpl.scala
index 3cf85b0..7c4f610 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/impl/NCModelClientImpl.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/impl/NCModelClientImpl.scala
@@ -110,7 +110,7 @@ class NCModelClientImpl(mdl: NCModel) extends LazyLogging:
override val getVariants: util.Collection[NCVariant] = plData.variants.asJava
override val getTokens: JList[NCToken] = plData.tokens
- intentsMgr.solve(NCIntentSolverInput(ctx, mdl))
+ intentsMgr.solve(mdl, ctx)
/**
*
@@ -156,9 +156,7 @@ class NCModelClientImpl(mdl: NCModel) extends LazyLogging:
try
val r = ask(sample, null, userId)
- Option.when(
- ask(sample, null, userId).getIntentId != i.intent.id)
- (s"Unexpected intent ID: '${r.getIntentId}'")
+ Option.when(r.getIntentId != i.intent.id)(s"Unexpected intent ID: '${r.getIntentId}'")
catch case e: Throwable =>
logger.warn("Unexpected error.", e)
Option(e.getLocalizedMessage)
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/intent/matcher/NCIntentSolverInput.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/intent/matcher/NCIntentSolverInput.scala
deleted file mode 100644
index 9a8587f..0000000
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/intent/matcher/NCIntentSolverInput.scala
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * https://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.nlpcraft.internal.intent.matcher
-
-import org.apache.nlpcraft.*
-
-/**
- * Input data for intent solver.
- *
- * @param context
- * @model model
- * @param intentMatch
- */
-case class NCIntentSolverInput(
- context: NCContext,
- model: NCModel,
- var intentMatch: NCIntentMatch = null
-)
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/intent/matcher/NCIntentSolverManager.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/intent/matcher/NCIntentSolverManager.scala
index d27dae3..d093e9b 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/intent/matcher/NCIntentSolverManager.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/intent/matcher/NCIntentSolverManager.scala
@@ -23,30 +23,81 @@ import org.apache.nlpcraft.internal.ascii.NCAsciiTable
import org.apache.nlpcraft.internal.dialogflow.NCDialogFlowManager
import org.apache.nlpcraft.internal.intent.*
-import java.util.{Collections, List as JList}
import java.util.function.Function
+import java.util.{Collections, List as JList}
import scala.annotation.targetName
import scala.collection.mutable
import scala.collection.mutable.ArrayBuffer
import scala.jdk.CollectionConverters.*
import scala.language.postfixOps
-/**
- * Intent solver that finds the best matching intent given user sentence.
- */
-class NCIntentSolverManager(dialog: NCDialogFlowManager, intents: Map[NCIDLIntent, NCIntentMatch => NCResult]) extends LazyLogging:
+object NCIntentSolverManager:
/**
- * NOTE: not thread-safe.
- */
+ * Sentence variant & its weight.
+ *
+ * @param entities
+ */
+ private case class IntentSolverVariant(entities: Seq[NCEntity]) extends Ordered[IntentSolverVariant]:
+ private lazy val weights = calcWeight()
+
+ private def calcSparsity(toks: Seq[NCToken]): Int =
+ val idxs = toks.map(_.getIndex)
+ idxs.zipWithIndex.tail.map { (v, i) => Math.abs(v - idxs(i - 1)) }.sum - idxs.length + 1
+
+ private def calcWeight(): Seq[Int] =
+ val toks: Seq[Seq[NCToken]] = entities.map(_.getTokens.asScala.toSeq)
+
+ val toksCnt = toks.map(_.size).sum
+ val avgToksPerEntity = if toksCnt > 0 then Math.round((entities.size.toFloat / toksCnt) * 100) else 0
+ val totalSparsity = -toks.map(calcSparsity).sum // Less is better.
+
+ // Order is important.
+ Seq(toksCnt, avgToksPerEntity, totalSparsity)
+
+ override def compare(other: IntentSolverVariant): Int =
+ def compareWeight(weight1: Int, weight2: Int): Option[Int] =
+ val res = Integer.compare(weight1, weight2)
+ Option.when(res != 0)(res)
+
+ weights.zip(other.weights).flatMap { (w1, w2) => compareWeight(w1, w2) }.to(LazyList).headOption.getOrElse(0)
+
+ override def toString: String = s"${weights.mkString("[", ", ", "]")}"
+
+ /**
+ *
+ * @param termId
+ * @param entities
+ */
+
+ /**
+ *
+ * @param termId
+ * @param entities
+ */
+ private case class IntentTermEntities(termId: Option[String], entities: Seq[NCEntity])
+
+ /**
+ *
+ * @param intentId
+ * @param fn
+ * @param groups
+ * @param variant
+ * @param variantIdx
+ */
+ private case class IntentSolverResult(intentId: String, fn: NCIntentMatch => NCResult, groups: Seq[IntentTermEntities], variant: IntentSolverVariant, variantIdx: Int)
+
+ /**
+ * NOTE: not thread-safe.
+ */
private class Weight(ws: Int*) extends Ordered[Weight]:
private val buf = mutable.ArrayBuffer[Int]() ++ ws
/**
- * Adds given weight to this weight.
- *
- * @param that Weight to add.
- * @return
- */
+ * Adds given weight to this weight.
+ *
+ * @param that Weight to add.
+ * @return
+ */
@targetName("plusEqual")
def +=(that: Weight): Weight =
val tmp = mutable.ArrayBuffer[Int]()
@@ -57,48 +108,48 @@ class NCIntentSolverManager(dialog: NCDialogFlowManager, intents: Map[NCIDLInten
this
/**
- * Appends new weight.
- *
- * @param w New weight to append.
- * @return
- */
+ * Appends new weight.
+ *
+ * @param w New weight to append.
+ * @return
+ */
def append(w: Int): Weight =
buf.append(w)
this
/**
- * Prepends new weight.
- *
- * @param w New weight to prepend.
- * @return
- */
+ * Prepends new weight.
+ *
+ * @param w New weight to prepend.
+ * @return
+ */
def prepend(w: Int): Weight =
buf.prepend(w)
this
/**
- * Sets specific weight at a given index.
- *
- * @param idx
- * @param w
- */
+ * Sets specific weight at a given index.
+ *
+ * @param idx
+ * @param w
+ */
def setWeight(idx: Int, w: Int): Unit =
buf(idx) = w
/**
- * Gets element at given index or zero if index is out of bounds.
- *
- * @param i Index in collection.
- * @param c Collection.
- * @return
- */
+ * Gets element at given index or zero if index is out of bounds.
+ *
+ * @param i Index in collection.
+ * @param c Collection.
+ * @return
+ */
private def norm(i: Int, c: mutable.ArrayBuffer[Int]): Int = if i < c.size then c(i) else 0
/**
- *
- * @param that
- * @return
- */
+ *
+ * @param that
+ * @return
+ */
override def compare(that: Weight): Int =
def compareWeight(idx: Int): Option[Int] =
val res = Integer.compare(norm(idx, buf), norm(idx, that.buf))
@@ -111,21 +162,24 @@ class NCIntentSolverManager(dialog: NCDialogFlowManager, intents: Map[NCIDLInten
override def toString: String = buf.mkString("[", ", ", "]")
/**
- *
- * @param used
- * @param entity
- */
- private case class IntentEntity(
- var used: Boolean,
- var conv: Boolean,
- entity: NCEntity
- )
+ *
+ * @param used
+ * @param entity
+ */
+ private case class IntentEntity(var used: Boolean, var conv: Boolean, entity: NCEntity)
/**
- * @param termId
- * @param usedEntities
- * @param weight
- */
+ *
+ * @param result
+ * @param intentMatch
+ */
+ private case class IterationResult(result: NCResult, intentMatch: NCIntentMatch)
+
+ /**
+ * @param termId
+ * @param usedEntities
+ * @param weight
+ */
private case class TermMatch(termId: Option[String], usedEntities: Seq[IntentEntity], weight: Weight):
private lazy val maxIndex: Int = usedEntities.map(_.entity.getTokens.asScala.map(_.getIndex).max).max
@@ -138,22 +192,22 @@ class NCIntentSolverManager(dialog: NCDialogFlowManager, intents: Map[NCIDLInten
private case class PredicateMatch(entities: Seq[IntentEntity], weight: Weight)
/**
- *
- * @param term
- * @param usedEntities
- */
+ *
+ * @param term
+ * @param usedEntities
+ */
private case class TermEntitiesGroup(
term: NCIDLTerm,
usedEntities: Seq[IntentEntity]
)
/**
- *
- * @param entityGroups
- * @param weight
- * @param intent
- */
- private case class IntentMatch(
+ *
+ * @param entityGroups
+ * @param weight
+ * @param intent
+ */
+ private case class IntentMatchHolder(
entityGroups: List[TermEntitiesGroup],
weight: Weight,
intent: NCIDLIntent
@@ -167,27 +221,34 @@ class NCIntentSolverManager(dialog: NCDialogFlowManager, intents: Map[NCIDLInten
* @param variantIdx
*/
private case class MatchHolder(
- intentMatch: IntentMatch, // Match.
+ intentMatch: IntentMatchHolder, // Match.
callback: NCIntentMatch => NCResult, // Callback function.
- variant: NCIntentSolverVariant, // Variant used for the match.
+ variant: IntentSolverVariant, // Variant used for the match.
variantIdx: Int // Variant index.
)
+import org.apache.nlpcraft.internal.intent.matcher.NCIntentSolverManager.*
+
+/**
+ * Intent solver that finds the best matching intent given user sentence.
+ */
+class NCIntentSolverManager(dialog: NCDialogFlowManager, intents: Map[NCIDLIntent, NCIntentMatch => NCResult]) extends LazyLogging:
/**
* Main entry point for intent engine.
*
+ * @param mdl Model.
* @param ctx Query context.
* @param intents Intents to match for.
* @return
*/
- private def solveIntents(ctx: NCContext, intents: Map[NCIDLIntent, NCIntentMatch => NCResult]): List[NCIntentSolverResult] =
+ private def solveIntents(mdl: NCModel, ctx: NCContext, intents: Map[NCIDLIntent, NCIntentMatch => NCResult]): List[IntentSolverResult] =
dialog.ack(ctx.getRequest.getUserId)
val matches = mutable.ArrayBuffer.empty[MatchHolder]
// Find all matches across all intents and sentence variants.
for (
- (vrn, vrnIdx) <- ctx.getVariants.asScala.zipWithIndex;
+ (vrn, vrnIdx) <- ctx.getVariants.asScala.zipWithIndex if mdl.onVariant(vrn);
ents = vrn.getEntities.asScala;
varEntsGroups = ents.map(t => if t.getGroups != null then t.getGroups.asScala else Set.empty[String]);
(intent, callback) <- intents
@@ -204,7 +265,7 @@ class NCIntentSolverManager(dialog: NCDialogFlowManager, intents: Map[NCIDLInten
// Solve intent in isolation.
solveIntent(ctx, intent, ents.map(IntentEntity(false, false, _)).toSeq, convEnts, vrnIdx) match
- case Some(intentMatch) => matches += MatchHolder(intentMatch, callback, NCIntentSolverVariant(vrn.getEntities.asScala.toSeq), vrnIdx)
+ case Some(intentMatch) => matches += MatchHolder(intentMatch, callback, IntentSolverVariant(vrn.getEntities.asScala.toSeq), vrnIdx)
case None => // No-op.
val sorted = matches.sortWith((m1: MatchHolder, m2: MatchHolder) =>
@@ -244,10 +305,10 @@ class NCIntentSolverManager(dialog: NCDialogFlowManager, intents: Map[NCIDLInten
logMatches(sorted)
sorted.map(m =>
- NCIntentSolverResult(
+ IntentSolverResult(
m.intentMatch.intent.id,
m.callback,
- m.intentMatch.entityGroups.map(grp => NCIntentTermEntities(grp.term.id, grp.usedEntities.map(_.entity))),
+ m.intentMatch.entityGroups.map(grp => IntentTermEntities(grp.term.id, grp.usedEntities.map(_.entity))),
m.variant,
m.variantIdx
)
@@ -328,7 +389,7 @@ class NCIntentSolverManager(dialog: NCDialogFlowManager, intents: Map[NCIDLInten
*/
private def solveIntent(
ctx: NCContext, intent: NCIDLIntent, senEnts: Seq[IntentEntity], convEnts: Seq[IntentEntity], varIdx: Int
- ): Option[IntentMatch] =
+ ): Option[IntentMatchHolder] =
val intentId = intent.id
val opts = intent.options
val flow = dialog.getDialogFlow(ctx.getRequest.getUserId)
@@ -402,15 +463,19 @@ class NCIntentSolverManager(dialog: NCDialogFlowManager, intents: Map[NCIDLInten
if !opts.allowStmEntityOnly && usedSenEnts.isEmpty && usedConvEnts.nonEmpty then
logger.info(
- s"Intent '$intentId' did not match because all its matched tokens came from STM $varStr. " +
- s"See intent 'allowStmEntityOnly' option."
+ s"""
+ |Intent '$intentId' did not match because all its matched tokens came from STM $varStr.
+ |See intent 'allowStmEntityOnly' option.
+ |""".stripMargin
)
None
else if !opts.ignoreUnusedFreeWords && unusedToks.nonEmpty then
logger.info(
- s"Intent '$intentId' did not match because of unused free words $varStr. " +
- s"See intent 'ignoreUnusedFreeWords' option. " +
- s"Unused free words indexes: ${unusedToks.map(_.getIndex).mkString("{", ",", "}")}"
+ s"""
+ |Intent '$intentId' did not match because of unused free words $varStr.
+ |See intent 'ignoreUnusedFreeWords' option.
+ |Unused free words indexes: ${unusedToks.map(_.getIndex).mkString("{", ",", "}")}
+ |""".stripMargin
)
None
else
@@ -424,7 +489,7 @@ class NCIntentSolverManager(dialog: NCDialogFlowManager, intents: Map[NCIDLInten
intentW.prepend(nonFreeWordNum)
- Option(IntentMatch(entityGroups = intentGrps.toList, weight = intentW, intent = intent))
+ Option(IntentMatchHolder(entityGroups = intentGrps.toList, weight = intentW, intent = intent))
else
None
@@ -561,31 +626,31 @@ class NCIntentSolverManager(dialog: NCDialogFlowManager, intents: Map[NCIDLInten
/**
*
- * @param slvIn Intent solver input.
- * @throws NCRejection
+ * @param mdl
+ * @param ctx
+ * @return
*/
- private def solveIteration(slvIn: NCIntentSolverInput): Option[NCResult] =
+ private def solveIteration(mdl: NCModel, ctx: NCContext): Option[IterationResult] =
// Should it be an assertion?
if intents.isEmpty then throw new NCRejection("Intent solver has no registered intents.")
- val ctx = slvIn.context
val req = ctx.getRequest
val intentResults =
- try solveIntents(ctx, intents)
+ try solveIntents(mdl, ctx, intents)
catch case e: Exception => throw new NCRejection("Processing failed due to unexpected error.", e)
if intentResults.isEmpty then throw new NCRejection("No matching intent found.")
object Loop:
- private var data: Option[Option[NCResult]] = None
+ private var data: Option[Option[IterationResult]] = None
private var stopped: Boolean = false
def hasNext: Boolean = !stopped
- def finish(data: Option[NCResult]): Unit =
+ def finish(data: Option[IterationResult] = None): Unit =
Loop.data = Option(data)
Loop.stopped = true
- def result: Option[NCResult] = data.getOrElse(throw new NCRejection("No matching intent found - all intents were skipped."))
+ def result: Option[IterationResult] = data.getOrElse(throw new NCRejection("No matching intent found - all intents were skipped."))
for (intentRes <- intentResults.filter(_ != null) if Loop.hasNext)
val intentMatch: NCIntentMatch =
@@ -602,21 +667,20 @@ class NCIntentSolverManager(dialog: NCDialogFlowManager, intents: Map[NCIDLInten
new NCVariant:
override def getEntities: JList[NCEntity] = intentRes.variant.entities.asJava
try
- if slvIn.model.onMatchedIntent(intentMatch) then
+ if mdl.onMatchedIntent(intentMatch) then
// This can throw NCIntentSkip exception.
val cbRes = intentRes.fn(intentMatch)
// Store won intent match in the input.
- slvIn.intentMatch = intentMatch
if cbRes.getIntentId == null then
cbRes.setIntentId(intentRes.intentId)
logger.info(s"Intent '${intentRes.intentId}' for variant #${intentRes.variantIdx + 1} selected as the <|best match|>")
dialog.addMatchedIntent(intentMatch, cbRes, ctx)
- Loop.finish(Option(cbRes))
+ Loop.finish(Option(IterationResult(cbRes, intentMatch)))
else
logger.info(s"Model '${ctx.getModelConfig.getId}' triggered rematching of intents by intent '${intentRes.intentId}' on variant #${intentRes.variantIdx + 1}.")
- Loop.finish(None)
+ Loop.finish()
catch
case e: NCIntentSkip =>
// No-op - just skipping this result.
@@ -628,13 +692,31 @@ class NCIntentSolverManager(dialog: NCDialogFlowManager, intents: Map[NCIDLInten
/**
*
- * @param in
- * @throws NCRejection
+ * @param mdl
+ * @param ctx
+ * @return
*/
- def solve(in: NCIntentSolverInput): NCResult =
- var res: NCResult = null
- while (res == null)
- solveIteration(in) match
- case Some(iterRes) => res = iterRes
- case None => // No-op.
- res
\ No newline at end of file
+ def solve(mdl: NCModel, ctx: NCContext): NCResult =
+ val resCtx: NCResult = mdl.onContext(ctx)
+
+ if resCtx != null then
+ resCtx
+ else
+ var res: IterationResult = null
+
+ try
+ while (res == null)
+ solveIteration(mdl, ctx) match
+ case Some(iterRes) => res = iterRes
+ case None => // No-op.
+
+ mdl.onResult(res.intentMatch, res.result)
+
+ res.result
+ catch
+ case e: NCRejection =>
+ mdl.onRejection(if res != null then res.intentMatch else null, e)
+ throw e
+ case e: Throwable =>
+ mdl.onError(ctx, e)
+ throw e
\ No newline at end of file
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/intent/matcher/NCIntentSolverResult.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/intent/matcher/NCIntentSolverResult.scala
deleted file mode 100644
index b36e8e0..0000000
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/intent/matcher/NCIntentSolverResult.scala
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * https://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.nlpcraft.internal.intent.matcher
-
-import org.apache.nlpcraft.*
-
-/**
- *
- * @param termId
- * @param entities
- */
-case class NCIntentTermEntities(
- termId: Option[String],
- entities: Seq[NCEntity]
-)
-
-/**
- * Intent solver engine result. Using basic case class for easier Java interop.
- *
- * @param intentId
- * @param fn
- * @param groups
- * @param variant
- * @param variantIdx
- */
-case class NCIntentSolverResult(
- intentId: String,
- fn: NCIntentMatch => NCResult,
- groups: Seq[NCIntentTermEntities],
- variant: NCIntentSolverVariant,
- variantIdx: Int
-)
\ No newline at end of file
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/intent/matcher/NCIntentSolverVariant.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/intent/matcher/NCIntentSolverVariant.scala
deleted file mode 100644
index de1af0c..0000000
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/intent/matcher/NCIntentSolverVariant.scala
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * https://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.nlpcraft.internal.intent.matcher
-
-import org.apache.nlpcraft.*
-
-import java.util
-import scala.jdk.CollectionConverters.*
-
-/**
- * Sentence variant & its weight.
- *
- * @param entities
- */
-case class NCIntentSolverVariant(entities: Seq[NCEntity]) extends Ordered[NCIntentSolverVariant]:
- private lazy val weights = calcWeight()
-
- /**
- *
- * @param toks
- */
- private def calcSparsity(toks: Seq[NCToken]): Int =
- val idxs = toks.map(_.getIndex)
- idxs.zipWithIndex.tail.map { (v, i) => Math.abs(v - idxs(i - 1)) }.sum - idxs.length + 1
-
- /**
- * Calculates weight components sequence.
- */
- private def calcWeight(): Seq[Int] =
- val toks: Seq[Seq[NCToken]] = entities.map(_.getTokens.asScala.toSeq)
-
- val toksCnt = toks.map(_.size).sum
- val avgToksPerEntity = if toksCnt > 0 then Math.round((entities.size.toFloat / toksCnt) * 100) else 0
- val totalSparsity = -toks.map(calcSparsity).sum // Less is better.
-
- // Order is important.
- Seq(toksCnt, avgToksPerEntity, totalSparsity)
-
- override def compare(other: NCIntentSolverVariant): Int =
- def compareWeight(weight1: Int, weight2: Int): Option[Int] =
- val res = Integer.compare(weight1, weight2)
- Option.when(res != 0)(res)
-
- weights.zip(other.weights).flatMap { (w1, w2) => compareWeight(w1, w2) }.to(LazyList).headOption.getOrElse(0)
-
- override def toString: String = s"${weights.mkString("[", ", ", "]")}"
\ No newline at end of file
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/impl/NCSemanticEntityParserImpl.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/impl/NCSemanticEntityParserImpl.scala
index 99bb1df..71ea590 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/impl/NCSemanticEntityParserImpl.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/impl/NCSemanticEntityParserImpl.scala
@@ -29,7 +29,7 @@ import org.apache.nlpcraft.nlp.entity.parser.semantic.impl.NCSemanticSourceType.
import java.io.*
import java.util
import java.util.regex.*
-import java.util.{List as JList, Map as Jmap}
+import java.util.{List as JList, Map as JMap, Set as JSet}
import scala.collection.mutable
import scala.jdk.CollectionConverters.*
@@ -48,7 +48,7 @@ object NCSemanticEntityParserImpl:
def apply(
stemmer: NCSemanticStemmer,
parser: NCTokenParser,
- macros: Jmap[String, String],
+ macros: JMap[String, String],
elms: JList[NCSemanticElement]
): NCSemanticEntityParserImpl =
require(elms != null)
@@ -225,7 +225,6 @@ class NCSemanticEntityParserImpl(
val e = elemsMap(h.elemId)
new NCPropertyMapAdapter with NCEntity:
if (e.getProperties != null) e.getProperties.asScala.foreach { (k, v) => put(s"${h.elemId}:$k", v) }
-
h.value match
case Some(value) => put(s"${h.elemId}:value", value)
case None => // No-op.
@@ -233,4 +232,5 @@ class NCSemanticEntityParserImpl(
override val getTokens: JList[NCToken] = h.tokens.asJava
override val getRequestId: String = req.getRequestId
override val getId: String = h.elemId
+ override val getGroups: JSet[String] = e.getGroups
}).asJava
\ No newline at end of file
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/impl/NCSemanticSourceReader.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/impl/NCSemanticSourceReader.scala
index 9264163..d8981b1 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/impl/NCSemanticSourceReader.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/impl/NCSemanticSourceReader.scala
@@ -65,9 +65,9 @@ private[impl] object NCSemanticSourceReader:
)
case class Source(macros: Map[String, String], elements: Seq[Element])
- private def nvl[T](seq: Seq[T]): JSet[T] = if seq == null then null else new util.HashSet[T](seq.asJava)
- private def nvl[T](set: Set[T]): JSet[T] = if set == null then null else set.asJava
- private def nvl[T, R](seq: Seq[T], to: T => R): Seq[R] = if seq == null then null else seq.map(to)
+ private def nvlGroups[T](seq: Seq[T]): JSet[T] = if seq == null then null else new util.HashSet[T](seq.asJava)
+ private def nvlSynonyms[T](set: Set[T]): JSet[T] = if set == null then null else set.asJava
+ private def nvlElements[T, R](seq: Seq[T], to: T => R): Seq[R] = if seq == null then Seq.empty else seq.map(to)
private def nvlValues(m: Map[String, Set[String]]): JMap[String, JSet[String]] =
if m == null then null else m.map { (k, v) => k -> v.asJava }.asJava
private def nvlProperties(m: Map[String, AnyRef]): JMap[String, Object] =
@@ -78,9 +78,12 @@ private[impl] object NCSemanticSourceReader:
else
new NCPropertyMapAdapter with NCSemanticElement:
override val getId: String = e.id
- override val getGroups: JSet[String] = nvl(e.groups)
+ override val getGroups: JSet[String] =
+ val gs = nvlGroups(e.groups)
+
+ if gs != null && !gs.isEmpty then gs else super.getGroups
override val getValues: JMap[String, JSet[String]] = nvlValues(e.values)
- override val getSynonyms: JSet[String] = nvl(e.synonyms)
+ override val getSynonyms: JSet[String] = nvlSynonyms(e.synonyms)
override val getProperties: JMap[String, AnyRef] = nvlProperties(e.properties)
/**
@@ -101,4 +104,4 @@ private[impl] object NCSemanticSourceReader:
configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, true).
readValue(is, classOf[Source])
- NCSemanticSourceData(src.macros, nvl(src.elements, convertElement))
\ No newline at end of file
+ NCSemanticSourceData(src.macros, nvlElements(src.elements, convertElement))
\ No newline at end of file
diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/util/NCTestPipeline.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/util/NCTestPipeline.scala
index bbaf8a6..bc2c4f1 100644
--- a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/util/NCTestPipeline.scala
+++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/util/NCTestPipeline.scala
@@ -30,13 +30,15 @@ import java.util.{Optional, ArrayList as JList}
case class NCTestPipeline(tokParser: NCTokenParser) extends NCPropertyMapAdapter with NCModelPipeline with Cloneable:
require(tokParser != null)
+ private var variantFilter: Optional[NCVariantFilter] = Optional.empty()
+
override val getTokenParser: NCTokenParser = tokParser
override val getTokenEnrichers = new JList[NCTokenEnricher]()
override val getEntityEnrichers = new JList[NCEntityEnricher]()
override val getEntityParsers = new JList[NCEntityParser]()
override val getTokenValidators = new JList[NCTokenValidator]()
override val getEntityValidators = new JList[NCEntityValidator]()
- override val getVariantFilter: Optional[NCVariantFilter] = Optional.empty[NCVariantFilter]()
+ override def getVariantFilter: Optional[NCVariantFilter] = variantFilter
override def clone(): NCTestPipeline =
val copy = NCTestPipeline(this.tokParser)
@@ -46,6 +48,6 @@ case class NCTestPipeline(tokParser: NCTokenParser) extends NCPropertyMapAdapter
copy.getEntityParsers.addAll(this.getEntityParsers)
copy.getTokenValidators.addAll(this.getTokenValidators)
copy.getEntityValidators.addAll(this.getEntityValidators)
+ copy.variantFilter = this.variantFilter
- copy
-
+ copy
\ No newline at end of file