You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nlpcraft.apache.org by se...@apache.org on 2021/02/16 18:28:28 UTC
[incubator-nlpcraft] branch master updated: Abstract elements
support added.
This is an automated email from the ASF dual-hosted git repository.
sergeykamov pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
The following commit(s) were added to refs/heads/master by this push:
new a7ea33e Abstract elements support added.
a7ea33e is described below
commit a7ea33e1e5ca21ed944a5e7b8677ad9786e5cc19
Author: Sergey Kamov <sk...@gmail.com>
AuthorDate: Tue Feb 16 21:25:59 2021 +0300
Abstract elements support added.
---
.../apache/nlpcraft/common/nlp/NCNlpSentence.scala | 204 +++++++++++++-------
.../examples/misc/geo/keycdn/GeoManager.java | 6 +-
.../scala/org/apache/nlpcraft/model/NCElement.java | 2 +-
.../apache/nlpcraft/model/NCModelFileAdapter.java | 7 +
.../org/apache/nlpcraft/model/NCModelView.java | 26 ++-
.../scala/org/apache/nlpcraft/model/NCToken.java | 20 +-
.../apache/nlpcraft/model/impl/NCTokenImpl.scala | 19 +-
.../nlpcraft/model/impl/json/NCModelJson.java | 7 +
.../nlpcraft/probe/mgrs/NCProbeVariants.scala | 206 +++++++++++++++++++--
.../probe/mgrs/deploy/NCDeployManager.scala | 17 +-
.../probe/mgrs/nlp/NCProbeEnrichmentManager.scala | 4 +-
.../mgrs/nlp/enrichers/limit/NCLimitEnricher.scala | 147 +++++++--------
.../mgrs/nlp/enrichers/model/NCModelEnricher.scala | 4 +-
.../scala/org/apache/nlpcraft/NCTestContext.scala | 28 ++-
.../nlpcraft/examples/time/NCTimeModelSpec.scala | 24 +--
.../apache/nlpcraft/model/NCIntentDslSpec.scala | 14 +-
.../apache/nlpcraft/model/NCIntentDslSpec2.scala | 19 +-
.../abstract/NCAbstractTokensEnricherSpec.scala | 57 ++++++
.../abstract/NCAbstractTokensIntentsSpec.scala | 53 ++++++
.../model/abstract/NCAbstractTokensModel.scala | 57 ++++++
.../abstract/NCAbstractTokensVariantsSpec.scala | 168 +++++++++++++++++
.../nlpcraft/models/stm/NCStmTestModelSpec.scala | 23 +--
.../mgrs/nlp/enrichers/NCDefaultTestModel.scala | 14 +-
.../mgrs/nlp/enrichers/NCEnricherBaseSpec.scala | 17 +-
.../nlp/enrichers/NCEnrichersTestContext.scala | 32 ++++
.../nlp/enrichers/sort/NCEnricherSortSpec.scala | 5 -
26 files changed, 931 insertions(+), 249 deletions(-)
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala
index d1aeb60..e7aecd3 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala
@@ -17,17 +17,21 @@
package org.apache.nlpcraft.common.nlp
+import com.typesafe.scalalogging.LazyLogging
import org.apache.nlpcraft.common.NCE
import org.apache.nlpcraft.common.nlp.pos.NCPennTreebank
+import org.apache.nlpcraft.model.NCModel
import java.util
+import java.util.{List ⇒ JList}
+import java.io.{Serializable ⇒ JSerializable}
import java.util.Collections
import scala.collection.JavaConverters._
import scala.collection.mutable.ArrayBuffer
import scala.collection.{Map, Seq, Set, mutable}
import scala.language.implicitConversions
-object NCNlpSentence {
+object NCNlpSentence extends LazyLogging {
implicit def toTokens(x: NCNlpSentence): ArrayBuffer[NCNlpSentenceToken] = x.tokens
/**
@@ -55,7 +59,7 @@ object NCNlpSentence {
* should not be excluded, but invalid relation should be deleted for these combinations.
*/
types.size match {
- case 0 ⇒ throw new AssertionError(s"Unexpected empty types [notesType=$notesType]")
+ case 0 ⇒ false
case 1 ⇒ types.head == notesType
case _ ⇒
// Equal elements should be processed together with function element.
@@ -64,6 +68,8 @@ object NCNlpSentence {
else {
ns.removeNote(note)
+ logger.trace(s"Removed note: $note")
+
true
}
}
@@ -90,7 +96,7 @@ object NCNlpSentence {
ns.filter(_.isTypeOf(noteType)).foreach(tok ⇒
tok.getNoteOpt(noteType, idxsField) match {
case Some(n) ⇒
- val idxs: Seq[Int] = n.data[java.util.List[Int]](idxsField).asScala
+ val idxs: Seq[Int] = n.data[JList[Int]](idxsField).asScala
var fixed = idxs
history.foreach { case (idxOld, idxNew) ⇒ fixed = fixed.map(i ⇒ if (i == idxOld) idxNew else i) }
@@ -98,13 +104,13 @@ object NCNlpSentence {
fixed = fixed.distinct
if (idxs != fixed)
- ns.fixNote(n, "indexes" → fixed.asJava.asInstanceOf[java.io.Serializable])
+ ns.fixNote(n, "indexes" → fixed.asJava.asInstanceOf[JSerializable])
case None ⇒ // No-op.
}
)
ns.flatMap(_.getNotes(noteType)).forall(
- n ⇒ checkRelation(ns, n.data[java.util.List[Int]]("indexes").asScala, n.data[String](noteField), n)
+ n ⇒ checkRelation(ns, n.data[JList[Int]]("indexes").asScala, n.data[String](noteField), n)
)
}
@@ -117,7 +123,7 @@ object NCNlpSentence {
*/
private def fixNoteIndexes(note: String, idxsField: String, noteField: String, ns: NCNlpSentence): Unit =
ns.flatMap(_.getNotes(note)).foreach(
- n ⇒ checkRelation(ns, n.data[java.util.List[Int]](idxsField).asScala, n.data[String](noteField), n)
+ n ⇒ checkRelation(ns, n.data[JList[Int]](idxsField).asScala, n.data[String](noteField), n)
)
/**
@@ -127,11 +133,11 @@ object NCNlpSentence {
* @param noteField
* @param ns
*/
- private def fixNoteIndexesList(note: String, idxsField: String, noteField: String, ns: NCNlpSentence): Unit =
+ private def fixNoteIndexesList(note: String, idxsField: String, noteField: String, ns: NCNlpSentence): Unit = {
ns.flatMap(_.getNotes(note)).foreach(rel ⇒
- rel.dataOpt[java.util.List[java.util.List[Int]]](idxsField) match {
+ rel.dataOpt[JList[JList[Int]]](idxsField) match {
case Some(idxsList) ⇒
- val notesTypes = rel.data[util.List[String]](noteField)
+ val notesTypes = rel.data[JList[String]](noteField)
require(idxsList.size() == notesTypes.size())
@@ -141,7 +147,7 @@ object NCNlpSentence {
case None ⇒ // No-op.
}
)
-
+ }
/**
* Copies token.
@@ -179,7 +185,7 @@ object NCNlpSentence {
// Of compilation errors which seems as scala compiler internal error.
val bufs = new util.ArrayList[mutable.Buffer[NCNlpSentenceToken]]()
- def last[T](l: util.List[T]): T = l.get(l.size() - 1)
+ def last[T](l: JList[T]): T = l.get(l.size() - 1)
ns.filter(t ⇒ t.isStopWord && !t.isBracketed).foreach(t ⇒
if (!bufs.isEmpty && last(bufs).last.index + 1 == t.index)
@@ -367,7 +373,6 @@ object NCNlpSentence {
t
}
-
/**
* Fixes notes with references list to other notes indexes.
*
@@ -391,7 +396,7 @@ object NCNlpSentence {
tok.getNoteOpt(noteType, idxsField) match {
case Some(n) ⇒
val idxs: Seq[Seq[Int]] =
- n.data[java.util.List[java.util.List[Int]]](idxsField).asScala.map(_.asScala)
+ n.data[JList[JList[Int]]](idxsField).asScala.map(_.asScala)
var fixed = idxs
history.foreach {
@@ -401,16 +406,16 @@ object NCNlpSentence {
if (fixed.forall(_.size == 1))
// Fix double dimension array to one dimension,
// so it should be called always in spite of 'fixIndexesReferences' method.
- ns.fixNote(n, idxsField → fixed.map(_.head).asJava.asInstanceOf[java.io.Serializable])
+ ns.fixNote(n, idxsField → fixed.map(_.head).asJava.asInstanceOf[JSerializable])
else
ok = false
case None ⇒ // No-op.
}
ok &&
ns.flatMap(_.getNotes(noteType)).forall(rel ⇒
- rel.dataOpt[java.util.List[Int]](idxsField) match {
+ rel.dataOpt[JList[Int]](idxsField) match {
case Some(idxsList) ⇒
- val notesTypes = rel.data[util.List[String]](noteField)
+ val notesTypes = rel.data[JList[String]](noteField)
require(idxsList.size() == notesTypes.size())
@@ -489,13 +494,15 @@ import org.apache.nlpcraft.common.nlp.NCNlpSentence._
* @param text Normalized text.
* @param enabledBuiltInToks Enabled built-in tokens.
* @param tokens Initial buffer.
+ * @param deletedNotes Deleted overridden notes with their tokens.
*/
class NCNlpSentence(
val srvReqId: String,
val text: String,
val enabledBuiltInToks: Set[String],
- override val tokens: ArrayBuffer[NCNlpSentenceToken] = new ArrayBuffer[NCNlpSentenceToken](32)
-) extends NCNlpSentenceTokenBuffer(tokens) with java.io.Serializable {
+ override val tokens: mutable.ArrayBuffer[NCNlpSentenceToken] = new mutable.ArrayBuffer[NCNlpSentenceToken](32),
+ val deletedNotes: mutable.HashMap[NCNlpSentenceNote, Seq[NCNlpSentenceToken]] = mutable.HashMap.empty
+) extends NCNlpSentenceTokenBuffer(tokens) with JSerializable {
@transient
private var hash: java.lang.Integer = _
@@ -504,7 +511,13 @@ class NCNlpSentence(
// Deep copy.
override def clone(): NCNlpSentence =
- new NCNlpSentence(srvReqId, text, enabledBuiltInToks, tokens.map(_.clone()))
+ new NCNlpSentence(
+ srvReqId,
+ text,
+ enabledBuiltInToks,
+ tokens.map(_.clone()),
+ deletedNotes.map(p ⇒ p._1.clone() → p._2.map(_.clone()))
+ )
/**
* Utility method that gets set of notes for given note type collected from
@@ -531,7 +544,7 @@ class NCNlpSentence(
hash
}
- def fixNote(note: NCNlpSentenceNote, kvs: (String, java.io.Serializable)*): Unit = {
+ def fixNote(note: NCNlpSentenceNote, kvs: (String, JSerializable)*): Unit = {
val fixed = note.clone(kvs: _*)
this.filter(t ⇒ t.index >= fixed.tokenIndexes.head && t.index <= fixed.tokenIndexes.last).foreach(t ⇒ {
@@ -542,6 +555,62 @@ class NCNlpSentence(
hash = null
}
+ private def dropAbstract(mdl: NCModel, ns: NCNlpSentence): Unit =
+ if (!mdl.getAbstractTokens.isEmpty) {
+ val notes = ns.flatten
+
+ case class Key(id: String, start: Int, end: Int) {
+ private def in(i: Int): Boolean = i >= start && i <= end
+ def intersect(id: String, start: Int, end: Int): Boolean = id == this.id && (in(start) || in(end))
+ }
+
+ val keys: Seq[Key] =
+ notes.filter(_.isUser).flatMap(n ⇒ {
+ val optList: Option[JList[util.HashMap[String, JSerializable]]] = n.dataOpt("parts")
+
+ optList
+ }).flatMap(_.asScala).map(map ⇒ Key(
+ map.get("id").asInstanceOf[String],
+ map.get("startcharindex").asInstanceOf[Int],
+ map.get("endcharindex").asInstanceOf[Int])
+ ).distinct
+
+ case class NoteLink(note: String, indexes: Seq[Int])
+
+ val noteLinks = mutable.ArrayBuffer.empty[NoteLink]
+
+ for (n ← notes.filter(n ⇒ n.noteType == "nlpcraft:limit" || n.noteType == "nlpcraft:references"))
+ noteLinks += NoteLink(n("note").asInstanceOf[String], n("indexes").asInstanceOf[JList[Int]].asScala)
+
+ for (n ← notes.filter(_.noteType == "nlpcraft:sort")) {
+ def add(noteName: String, idxsName: String): Unit = {
+ val names = n(noteName).asInstanceOf[JList[String]]
+ val idxsSeq = n(idxsName).asInstanceOf[JList[JList[Int]]]
+
+ require(names.size() == idxsSeq.size())
+
+ noteLinks ++=
+ (for ((name, idxs) ← names.asScala.zip(idxsSeq.asScala.map(_.asScala)))
+ yield NoteLink(name, idxs)
+ )
+ }
+
+ if (n.contains("subjnotes")) add("subjnotes", "subjindexes")
+ if (n.contains("bynotes")) add("bynotes", "byindexes")
+ }
+
+ notes.filter(n ⇒ {
+ val noteToks = ns.tokens.filter(_.contains(n))
+
+ mdl.getAbstractTokens.contains(n.noteType) &&
+ !keys.exists(_.intersect(n.noteType, noteToks.head.startCharIndex, noteToks.last.startCharIndex)) &&
+ !noteLinks.contains(NoteLink(n.noteType, n.tokenIndexes))
+ }).foreach(ns.removeNote)
+ }
+
+ private def getNotNlpNotes(toks: Seq[NCNlpSentenceToken]): Seq[NCNlpSentenceNote] =
+ toks.flatten.filter(!_.isNlp).distinct
+
/**
* This collapser handles several tasks:
* - "overall" collapsing after all other individual collapsers had their turn.
@@ -549,42 +618,45 @@ class NCNlpSentence(
*
* In all cases of overlap (full or partial) - the "longest" note wins. In case of overlap and equal
* lengths - the winning note is chosen based on this priority.
- *
*/
@throws[NCE]
- def collapse(): Seq[NCNlpSentence] = {
+ def collapse(mdl: NCModel, lastPhase: Boolean = false): Seq[NCNlpSentence] = {
+ def collapse0(ns: NCNlpSentence): Option[NCNlpSentence] = {
+ if (lastPhase)
+ dropAbstract(mdl, ns)
+
+ if (collapseSentence(ns, getNotNlpNotes(ns).map(_.noteType).distinct)) Some(ns) else None
+ }
+
// Always deletes `similar` notes.
// Some words with same note type can be detected various ways.
// We keep only one variant - with `best` direct and sparsity parameters,
// other variants for these words are redundant.
val redundant: Seq[NCNlpSentenceNote] =
- this.flatten.filter(!_.isNlp).distinct.
- groupBy(_.getKey()).
- map(p ⇒ p._2.sortBy(p ⇒
- (
- // System notes don't have such flags.
- if (p.isUser) {
- if (p.isDirect)
- 0
+ this.flatten.filter(!_.isNlp).distinct.
+ groupBy(_.getKey()).
+ map(p ⇒ p._2.sortBy(p ⇒
+ (
+ // System notes don't have such flags.
+ if (p.isUser) {
+ if (p.isDirect)
+ 0
+ else
+ 1
+ }
else
- 1
- }
- else
- 0,
- if (p.isUser)
- p.sparsity
- else
- 0
- )
- )).
- flatMap(_.drop(1)).
- toSeq
+ 0,
+ if (p.isUser)
+ p.sparsity
+ else
+ 0
+ )
+ )).
+ flatMap(_.drop(1)).
+ toSeq
redundant.foreach(this.removeNote)
- def getNotNlpNotes(toks: Seq[NCNlpSentenceToken]): Seq[NCNlpSentenceNote] =
- toks.flatten.filter(!_.isNlp).distinct
-
val delCombs: Seq[NCNlpSentenceNote] =
getNotNlpNotes(this).
flatMap(note ⇒ getNotNlpNotes(this.slice(note.tokenFrom, note.tokenTo + 1)).filter(_ != note)).
@@ -598,7 +670,7 @@ class NCNlpSentence(
val minDelSize = if (toksByIdx.isEmpty) 1 else toksByIdx.map(_.size).max - 1
- val sens =
+ var sens =
if (delCombs.nonEmpty) {
val deleted = mutable.ArrayBuffer.empty[Seq[NCNlpSentenceNote]]
@@ -614,6 +686,21 @@ class NCNlpSentence(
if (!deleted.exists(_.forall(delComb.contains))) {
val nsClone = this.clone()
+ // Saves deleted notes for sentence and their tokens.
+ nsClone.deletedNotes ++= delComb.map(n ⇒ {
+ val savedDelNote = n.clone()
+ val savedDelToks = n.tokenIndexes.map(idx ⇒ nsClone(idx).clone())
+
+ val mainNotes =
+ savedDelToks.flatten.filter(n ⇒ n.noteType != "nlpcraft:nlp" && n != savedDelNote)
+
+ // Deleted note's tokens should contains only nlp data and deleted notes.
+ for (savedDelTok ← savedDelToks; mainNote ← mainNotes)
+ savedDelTok.remove(mainNote)
+
+ savedDelNote → savedDelToks
+ })
+
delComb.foreach(nsClone.removeNote)
// Has overlapped notes for some tokens.
@@ -621,9 +708,7 @@ class NCNlpSentence(
deleted += delComb
- val notNlpTypes = getNotNlpNotes(nsClone).map(_.noteType).distinct
-
- if (collapseSentence(nsClone, notNlpTypes)) Some(nsClone) else None
+ collapse0(nsClone)
}
else
None
@@ -631,10 +716,7 @@ class NCNlpSentence(
// It removes sentences which have only one difference - 'direct' flag of their user tokens.
// `Direct` sentences have higher priority.
- case class Key(
- sysNotes: Seq[Map[String, java.io.Serializable]],
- userNotes: Seq[Map[String, java.io.Serializable]]
- )
+ case class Key(sysNotes: Seq[Map[String, JSerializable]], userNotes: Seq[Map[String, JSerializable]])
case class Value(sentence: NCNlpSentence, directCount: Int)
val m = mutable.HashMap.empty[Key, Value]
@@ -646,7 +728,7 @@ class NCNlpSentence(
val nlpNotes = notes.filter(_.isNlp)
val userNotes = notes.filter(_.isUser)
- def get(seq: Seq[NCNlpSentenceNote]): Seq[Map[String, java.io.Serializable]] =
+ def get(seq: Seq[NCNlpSentenceNote]): Seq[Map[String, JSerializable]] =
seq.map(p ⇒
// We have to delete some keys to have possibility to compare sentences.
p.clone().filter(_._1 != "direct")
@@ -666,12 +748,10 @@ class NCNlpSentence(
m.values.map(_.sentence).toSeq
}
- else {
- if (collapseSentence(this, getNotNlpNotes(this).map(_.noteType).distinct))
- Seq(this)
- else
- Seq.empty
- }.distinct
+ else
+ collapse0(this).flatMap(p ⇒ Option(Seq(p))).getOrElse(Seq.empty)
+
+ sens = sens.distinct
sens.foreach(sen ⇒
sen.foreach(tok ⇒
@@ -720,11 +800,11 @@ class NCNlpSentence(
tokensEqualOrSimilar0(set1, set2) || tokensEqualOrSimilar0(set2, set1)
def getList(n: NCNlpSentenceNote, refIdxName: String): Set[NCNlpSentenceToken] =
- n.getOrElse(refIdxName, Collections.emptyList).asInstanceOf[java.util.List[Int]].asScala.
+ n.getOrElse(refIdxName, Collections.emptyList).asInstanceOf[JList[Int]].asScala.
map(this (_)).toSet
def getListList(n: NCNlpSentenceNote, refIdxName: String): Set[NCNlpSentenceToken] =
- n.getOrElse(refIdxName, Collections.emptyList).asInstanceOf[java.util.List[java.util.List[Int]]].asScala.
+ n.getOrElse(refIdxName, Collections.emptyList).asInstanceOf[JList[JList[Int]]].asScala.
flatMap(_.asScala.map(this (_))).toSet
def referencesEqualOrSimilar0(n1: NCNlpSentenceNote, n2: NCNlpSentenceNote): Boolean = {
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/examples/misc/geo/keycdn/GeoManager.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/examples/misc/geo/keycdn/GeoManager.java
index e254c04..7f0b7e9 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/examples/misc/geo/keycdn/GeoManager.java
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/examples/misc/geo/keycdn/GeoManager.java
@@ -59,7 +59,7 @@ public class GeoManager {
* @return Geo data. Optional.
*/
public Optional<GeoDataBean> get(NCRequest sen) {
- if (!sen.getRemoteAddress().isPresent()) {
+ if (sen.getRemoteAddress().isEmpty()) {
System.err.println("Geo data can't be found because remote address is not available in the sentence.");
return Optional.empty();
@@ -90,8 +90,8 @@ public class GeoManager {
HttpURLConnection conn = (HttpURLConnection)(new URL(URL + host).openConnection());
- // This service requires "User-Agent" property for some reasons.
- conn.setRequestProperty("User-Agent", "rest");
+ // This service requires "User-Agent" property with its own format.
+ conn.setRequestProperty("User-Agent", "keycdn-tools:https://nlpcraft.apache.org");
try (InputStream in = conn.getInputStream()) {
String enc = conn.getContentEncoding();
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCElement.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCElement.java
index 3da7f5a..9ff5bf7 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCElement.java
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCElement.java
@@ -149,7 +149,7 @@ public interface NCElement extends NCMetadata, Serializable {
* ]
* </pre>
*
- * @return Element's metadata or empty collection if none provided. Default implementation return empty collection. TODO:
+ * @return Element's metadata or empty collection if none provided. Default implementation return empty collection.
*/
default Map<String, Object> getMetadata() {
return Collections.emptyMap();
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModelFileAdapter.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModelFileAdapter.java
index c607677..883dfac 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModelFileAdapter.java
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModelFileAdapter.java
@@ -57,6 +57,7 @@ abstract public class NCModelFileAdapter extends NCModelAdapter {
private final NCModelJson proxy;
private final Set<String> suspWords;
private final Set<String> enabledToks;
+ private final Set<String> abstractToks;
private final Set<String> addStopwords;
private final Set<String> exclStopwords;
private final Set<String> intents;
@@ -112,6 +113,7 @@ abstract public class NCModelFileAdapter extends NCModelAdapter {
this.proxy = proxy;
this.suspWords = convert(proxy.getSuspiciousWords(), null);
this.enabledToks = convert(proxy.getEnabledBuiltInTokens(), NCModelView.DFLT_ENABLED_BUILTIN_TOKENS);
+ this.abstractToks = convert(proxy.getAbstractTokens(), Collections.emptySet());
this.addStopwords = convert(proxy.getAdditionalStopWords(), null);
this.exclStopwords = convert(proxy.getExcludedStopWords(), null);
this.elems = convertElements(proxy.getElements());
@@ -487,6 +489,11 @@ abstract public class NCModelFileAdapter extends NCModelAdapter {
}
@Override
+ public Set<String> getAbstractTokens() {
+ return abstractToks;
+ }
+
+ @Override
public List<NCCustomParser> getParsers() {
return parsers;
}
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModelView.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModelView.java
index f82a0ce..2633557 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModelView.java
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModelView.java
@@ -691,7 +691,7 @@ public interface NCModelView extends NCMetadata {
* }
* </pre>
*
- * @return Optional user defined model metadata. TODO: cannot be null
+ * @return Optional user defined model metadata. By default, returns an empty map. Never returns {@code null}.
*/
default Map<String, Object> getMetadata() {
return DFLT_METADATA;
@@ -830,7 +830,7 @@ public interface NCModelView extends NCMetadata {
* }
* </pre>
*
- * @return Custom user parsers for model elements or {@code null} if not used (default). TODO: cannot be null!
+ * @return Custom user parsers for model elements or empty list if not used (default). Never returns {@code null}.
*/
default List<NCCustomParser> getParsers() {
return Collections.emptyList();
@@ -907,13 +907,33 @@ public interface NCModelView extends NCMetadata {
* }
* </pre>
*
- * @return Set of built-in tokens, potentially empty, that should be enabled and detected for this model.
+ * @return Set of built-in tokens, potentially empty but never {@code null}, that should be enabled
+ * and detected for this model.
*/
default Set<String> getEnabledBuiltInTokens() {
return DFLT_ENABLED_BUILTIN_TOKENS;
}
/**
+ * Gets s set of named entities (token) IDs that will be considered as abstract tokens.
+ * An abstract token is only detected when it is either a constituent part of some other non-abstract token
+ * or referenced by built-in tokens. In other words, an abstract token will not be detected in a standalone
+ * unreferenced position. By default (unless returned by this method), all named entities considered to be
+ * non-abstract.
+ * <p>
+ * Declaring tokens as abstract is important to minimize number of parsing variants automatically
+ * generated as permutation of all possible parsing compositions. For example, if it is known that a particular
+ * named entity will only be used as a constituent part of some other token - declaring such named entity as
+ * abstract can significantly reduce the number of parsing variants leading to a better performance,
+ * and often simpler corresponding intent definition and callback logic.
+ *
+ * @return Set of abstract token IDs. Can be empty but never {@code null}.
+ */
+ default Set<String> getAbstractTokens() {
+ return Collections.emptySet();
+ }
+
+ /**
* Gets maximum number of unique synonyms per model element after which either warning or error will be
* triggered. Note that there is no technical limit on how many synonyms a model element can have apart
* from memory consumption and performance considerations. However, in cases where synonyms are auto-generated
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCToken.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCToken.java
index a3a9eb9..c429f59 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCToken.java
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCToken.java
@@ -309,6 +309,24 @@ public interface NCToken extends NCMetadata {
String id = getId();
int i = id.indexOf(':');
- return i <=0 || !"nlpcraft google opennlp spacy stanford".contains(id.substring(0, i));
+ return i <= 0 || !"nlpcraft google opennlp spacy stanford".contains(id.substring(0, i));
}
+
+ /**
+ * Whether or not this token is abstract.
+ * <p>
+ * An abstract token is only detected when it is either a constituent part of some other non-abstract token
+ * or referenced by built-in tokens. In other words, an abstract token will not be detected in a standalone
+ * unreferenced position. By default (unless returned by this method), all named entities considered to be
+ * non-abstract.
+ * <p>
+ * Declaring tokens as abstract is important to minimize number of parsing variants automatically
+ * generated as permutation of all possible parsing compositions. For example, if it is known that a particular
+ * named entity will only be used as a constituent part of some other token - declaring such named entity as
+ * abstract can significantly reduce the number of parsing variants leading to a better performance,
+ * and often simpler corresponding intent definition and callback logic.
+ *
+ * @return Whether or not this token is abstract.
+ */
+ boolean isAbstract();
}
\ No newline at end of file
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/NCTokenImpl.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/NCTokenImpl.scala
index 106f673..2a3ea31 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/NCTokenImpl.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/NCTokenImpl.scala
@@ -36,6 +36,7 @@ import scala.collection.{Seq, mutable}
* @param parentId
* @param value
* @param meta
+ * @param isAbstr
*/
private[nlpcraft] class NCTokenImpl(
mdl: NCModelView,
@@ -47,7 +48,8 @@ private[nlpcraft] class NCTokenImpl(
value: String,
startCharIndex: Int,
endCharIndex: Int,
- meta: Map[String, Object]
+ meta: Map[String, Object],
+ isAbstr: Boolean
) extends NCToken with Serializable {
require(mdl != null)
require(srvReqId != null)
@@ -60,7 +62,7 @@ private[nlpcraft] class NCTokenImpl(
Seq(srvReqId, id, startCharIndex, endCharIndex).map(_.hashCode()).foldLeft(0)((a, b) ⇒ 31 * a + b)
private var parts = Seq.empty[NCToken]
-
+
override lazy val getModel: NCModelView = mdl
override lazy val getMetadata: java.util.Map[String, Object] = mutable.HashMap(meta.toSeq:_ *).asJava // We need mutable metadata.
override lazy val getServerRequestId: String = srvReqId
@@ -73,16 +75,17 @@ private[nlpcraft] class NCTokenImpl(
override lazy val getEndCharIndex: Int = endCharIndex
override lazy val getAliases: java.util.List[String] = meta(TOK_META_ALIASES_KEY, Collections.emptyList())
override def getPartTokens: java.util.List[NCToken] = parts.asJava
-
+ override def isAbstract: Boolean = isAbstr
+
def setParts(parts: Seq[NCToken]): Unit = this.parts = parts
-
+
override def equals(other: Any): Boolean = other match {
case t: NCTokenImpl ⇒
getServerRequestId == t.getServerRequestId &&
getId == t.getId &&
getStartCharIndex == t.getStartCharIndex &&
getEndCharIndex == t.getEndCharIndex
-
+
case _ ⇒ false
}
@@ -151,7 +154,8 @@ private[nlpcraft] object NCTokenImpl {
value = usrNote.dataOpt("value").orNull,
startCharIndex = tok.startCharIndex,
endCharIndex = tok.endCharIndex,
- meta = convertMeta()
+ meta = convertMeta(),
+ isAbstr = mdl.model.getAbstractTokens.contains(elm.getId)
)
case None ⇒
@@ -174,7 +178,8 @@ private[nlpcraft] object NCTokenImpl {
value = null,
startCharIndex = tok.startCharIndex,
endCharIndex = tok.endCharIndex,
- meta = convertMeta()
+ meta = convertMeta(),
+ isAbstr = mdl.model.getAbstractTokens.contains(note.noteType)
)
}
}
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/json/NCModelJson.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/json/NCModelJson.java
index d1e0f90..0ded090 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/json/NCModelJson.java
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/json/NCModelJson.java
@@ -36,6 +36,7 @@ public class NCModelJson {
private String[] excludedStopWords;
private String[] suspiciousWords;
private String[] enabledBuiltInTokens;
+ private String[] abstractTokens;
private String[] intents;
private String[] parsers;
@@ -225,6 +226,12 @@ public class NCModelJson {
return enabledBuiltInTokens;
}
public void setEnabledBuiltInTokens(String[] enabledBuiltInTokens) { this.enabledBuiltInTokens = enabledBuiltInTokens; }
+ public String[] getAbstractTokens() {
+ return abstractTokens;
+ }
+ public void setAbstractTokens(String[] abstractTokens) {
+ this.abstractTokens = abstractTokens;
+ }
public String[] getIntents() {
return intents;
}
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeVariants.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeVariants.scala
index 351c8cb..16138c6 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeVariants.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeVariants.scala
@@ -17,14 +17,15 @@
package org.apache.nlpcraft.probe.mgrs
-import java.io.Serializable
-import java.util
-
-import org.apache.nlpcraft.common.TOK_META_ALIASES_KEY
-import org.apache.nlpcraft.common.nlp.NCNlpSentence
+import org.apache.nlpcraft.common.nlp.pos.NCPennTreebank
+import org.apache.nlpcraft.common.nlp.{NCNlpSentence, NCNlpSentenceNote, NCNlpSentenceToken}
+import org.apache.nlpcraft.common.{NCE, TOK_META_ALIASES_KEY}
import org.apache.nlpcraft.model.NCVariant
import org.apache.nlpcraft.model.impl.{NCTokenImpl, NCVariantImpl}
+import java.io.{Serializable ⇒ JSerializable}
+import java.util
+import java.util.Collections.singletonList
import scala.collection.JavaConverters._
import scala.collection.{Seq, mutable}
@@ -32,29 +33,90 @@ import scala.collection.{Seq, mutable}
* Sentence to variants converter.
*/
object NCProbeVariants {
+ private final val IDXS_SER: JSerializable = singletonList(-1).asInstanceOf[JSerializable]
+ private final val IDXS2_SER: JSerializable = singletonList(singletonList(-1)).asInstanceOf[JSerializable]
+ private final val IDXS_OBJ: Object = IDXS_SER.asInstanceOf[Object]
+ private final val IDXS2_OBJ: Object = IDXS2_SER.asInstanceOf[Object]
+ private final val IDX_OBJ = (-1).asInstanceOf[Object]
+
+ private def mkNlpNote(srcToks: Seq[NCNlpSentenceToken]): NCNlpSentenceNote = {
+ // Note, it adds stop-words too.
+ def mkValue(get: NCNlpSentenceToken ⇒ String): String = {
+ val buf = mutable.Buffer.empty[String]
+
+ val n = srcToks.size - 1
+
+ srcToks.zipWithIndex.foreach(p ⇒ {
+ val t = p._1
+ val idx = p._2
+
+ buf += get(t)
+
+ if (idx < n && t.endCharIndex != srcToks(idx + 1).startCharIndex)
+ buf += " "
+ })
+
+ buf.mkString
+ }
+
+ def all(is: NCNlpSentenceToken ⇒ Boolean): Boolean = srcToks.forall(is)
+ def exists(is: NCNlpSentenceToken ⇒ Boolean): Boolean = srcToks.exists(is)
+
+ val origText = mkValue((t: NCNlpSentenceToken) ⇒ t.origText)
+
+ val params = Seq(
+ "index" → -1,
+ "pos" → NCPennTreebank.SYNTH_POS,
+ "posDesc" → NCPennTreebank.SYNTH_POS_DESC,
+ "lemma" → mkValue(_.lemma),
+ "origText" → origText,
+ "normText" → mkValue(_.normText),
+ "stem" → mkValue(_.stem),
+ "start" → srcToks.head.startCharIndex,
+ "end" → srcToks.last.endCharIndex,
+ "charLength" → origText.length,
+ "quoted" → false,
+ "stopWord" → exists(_.isStopWord),
+ "bracketed" → false,
+ "direct" → all(_.isDirect),
+ "dict" → all(_.isKnownWord),
+ "english" → all(_.isEnglish),
+ "swear" → exists(_.isSwearWord)
+ )
+
+ NCNlpSentenceNote(Seq(-1), srcToks.flatMap(_.wordIndexes).distinct.sorted, "nlpcraft:nlp", params: _*)
+ }
+
/**
* Makes variants for given sentences for given model.
*
* @param mdl Probe model.
* @param srvReqId Server request ID.
* @param sens Sentences.
+ * @param lastPhase Flag.
*/
- def convert(srvReqId: String, mdl: NCProbeModel, sens: Seq[NCNlpSentence]): Seq[NCVariant] = {
+ def convert(srvReqId: String, mdl: NCProbeModel, sens: Seq[NCNlpSentence], lastPhase: Boolean = false): Seq[NCVariant] = {
val seq = sens.map(_.toSeq.map(nlpTok ⇒ NCTokenImpl(mdl, srvReqId, nlpTok) → nlpTok))
val toks = seq.map(_.map { case (tok, _) ⇒ tok })
case class Key(id: String, from: Int, to: Int)
-
+
val keys2Toks = toks.flatten.map(t ⇒ Key(t.getId, t.getStartCharIndex, t.getEndCharIndex) → t).toMap
val partsKeys = mutable.HashSet.empty[Key]
+ val nlpTok2nlpSen: Map[NCNlpSentenceToken, Seq[NCNlpSentence]] =
+ sens.
+ flatMap(sen ⇒ sen.map(_ → sen)).
+ groupBy { case (tok, _) ⇒ tok }.
+ map { case (tok, seq) ⇒ tok → seq.map { case (_, sen) ⇒ sen } }
+
seq.flatten.foreach { case (tok, tokNlp) ⇒
if (tokNlp.isUser) {
val userNotes = tokNlp.filter(_.isUser)
require(userNotes.size == 1)
- val optList: Option[util.List[util.HashMap[String, Serializable]]] = userNotes.head.dataOpt("parts")
+ val optList: Option[util.List[util.HashMap[String, JSerializable]]] = userNotes.head.dataOpt("parts")
optList match {
case Some(list) ⇒
@@ -67,7 +129,93 @@ object NCProbeVariants {
)
)
- val parts = keys.map(keys2Toks)
+ val parts = keys.map(key ⇒ {
+ keys2Toks.get(key) match {
+ // Notes for sentence.
+ case Some(t) ⇒
+ val meta = mutable.HashMap.empty[String, Object]
+
+ meta += "nlpcraft:nlp:index" → IDX_OBJ
+
+ meta += s"${t.getId}:tokenindexes" → IDXS_OBJ
+ meta += s"${t.getId}:wordindexes" → IDXS_OBJ
+
+ t.getId match {
+ case "nlpcraft:relation" ⇒
+ meta += "nlpcraft:relation:indexes" → IDXS_OBJ
+ case "nlpcraft:limit" ⇒
+ meta += "nlpcraft:limit:indexes" → IDXS_OBJ
+ case "nlpcraft:sort" ⇒
+ meta += "nlpcraft:sort:subjindexes" → IDXS2_OBJ
+ meta += "nlpcraft:sort:byindexes" → IDXS2_OBJ
+ case _ ⇒ // No-op.
+ }
+
+ t.getMetadata.putAll(meta.asJava)
+
+ t
+ case None ⇒
+ // Tries to find between deleted notes.
+ val delNotes = nlpTok2nlpSen(tokNlp).flatMap(_.deletedNotes).distinct
+
+ def find(noteTypePred: String ⇒ Boolean): Option[NCNlpSentenceToken] =
+ delNotes.toStream.
+ flatMap { case (delNote, delNoteToks) ⇒
+ if (noteTypePred(delNote.noteType)) {
+ val toks =
+ delNoteToks.
+ dropWhile(_.startCharIndex != key.from).
+ reverse.
+ dropWhile(_.endCharIndex != key.to).
+ reverse
+
+ toks.size match {
+ case 0 ⇒ None
+ case _ ⇒
+ val artTok = NCNlpSentenceToken(-1)
+
+ artTok.add(mkNlpNote(toks))
+
+ if (key.id != "nlpcraft:nlp") {
+ val ps =
+ mutable.ArrayBuffer.empty[(String, JSerializable)]
+
+ ps += "tokenIndexes" → IDXS_SER
+ ps += "wordIndexes" → IDXS_SER
+
+ delNote.noteType match {
+ case "nlpcraft:relation" ⇒
+ ps += "indexes" → IDXS_SER
+ case "nlpcraft:limit" ⇒
+ ps += "indexes" → IDXS_SER
+ case "nlpcraft:sort" ⇒
+ ps += "subjindexes" → IDXS2_SER
+ ps += "byindexes" → IDXS2_SER
+ case _ ⇒ // No-op.
+ }
+
+ artTok.add(delNote.clone(ps :_*))
+ }
+
+ Some(artTok)
+ }
+ }
+ else
+ None
+ }.headOption
+
+ // Tries to find with same key.
+ var nlpTokOpt = find(_ == key.id)
+
+ // If couldn't find nlp note, we can try to find any note on the same position.
+ if (nlpTokOpt.isEmpty && key.id == "nlpcraft:nlp")
+ nlpTokOpt = find(_ ⇒ true)
+
+ val nlpTok = nlpTokOpt.getOrElse(throw new NCE(s"Part not found for: $key"))
+
+ NCTokenImpl(mdl, srvReqId, nlpTok)
+ }
+ })
parts.zip(list.asScala).foreach { case (part, map) ⇒
map.get(TOK_META_ALIASES_KEY) match {
@@ -84,13 +232,47 @@ object NCProbeVariants {
}
}
}
-
+
// We can't collapse parts earlier, because we need them here (setParts method, few lines above.)
- toks.filter(sen ⇒
+ var vars = toks.filter(sen ⇒
!sen.exists(t ⇒
t.getId != "nlpcraft:nlp" &&
- partsKeys.contains(Key(t.getId, t.getStartCharIndex, t.getEndCharIndex))
+ partsKeys.contains(Key(t.getId, t.getStartCharIndex, t.getEndCharIndex))
)
).map(p ⇒ new NCVariantImpl(p.asJava))
+
+ if (lastPhase && vars.size > 1) {
+ // Drops empty.
+ vars = vars.filter(v ⇒ !v.asScala.forall(_.getId == "nlpcraft:nlp"))
+
+ // Sorts by tokens count, desc.
+ val sortedVars = vars.sortBy(p ⇒ -p.asScala.count(_.getId != "nlpcraft:nlp"))
+
+ val bestVars = mutable.ArrayBuffer.empty :+ sortedVars.head
+
+ for (
+ vrnt ← sortedVars.tail
+ // Skips if the candidate has same structure that exists between already saved and
+ // there is only one difference - some candidate's tokens are nlp tokens.
+ if !bestVars.exists(savedVrnt ⇒
+ savedVrnt.size == vrnt.size &&
+ savedVrnt.asScala.zip(vrnt.asScala).forall { case (savedTok, tok) ⇒
+ savedTok.getStartCharIndex == tok.getStartCharIndex &&
+ savedTok.getEndCharIndex == tok.getEndCharIndex &&
+ (
+ savedTok.getId == tok.getId && savedTok.getMetadata == tok.getMetadata ||
+ tok.getId == "nlpcraft:nlp"
+ )
+ }
+ )
+ )
+ bestVars += vrnt
+
+ if (bestVars.size != vars.size)
+ // Reverts orders.
+ vars = bestVars.sortBy(sortedVars.indexOf)
+ }
+
+ vars
}
}
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCDeployManager.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCDeployManager.scala
index 1119c27..0c51051 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCDeployManager.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCDeployManager.scala
@@ -881,13 +881,14 @@ object NCDeployManager extends NCService with DecorateAsScala {
checkCollection("additionalStopWords", mdl.getAdditionalStopWords)
checkCollection("elements", mdl.getElements)
checkCollection("enabledBuiltInTokens", mdl.getEnabledBuiltInTokens)
+ checkCollection("abstractTokens", mdl.getAbstractTokens)
checkCollection("excludedStopWords", mdl.getExcludedStopWords)
checkCollection("parsers", mdl.getParsers)
checkCollection("suspiciousWords", mdl.getSuspiciousWords)
checkCollection("macros", mdl.getMacros)
checkCollection("metadata", mdl.getMetadata)
- val unsToks =
+ val unsToksBlt =
mdl.getEnabledBuiltInTokens.asScala.filter(t ⇒
// 'stanford', 'google', 'opennlp', 'spacy' - any names, not validated.
t == null ||
@@ -896,11 +897,21 @@ object NCDeployManager extends NCService with DecorateAsScala {
(t.startsWith("nlpcraft:") && !NCModelView.DFLT_ENABLED_BUILTIN_TOKENS.contains(t))
)
- if (unsToks.nonEmpty)
+ if (unsToksBlt.nonEmpty)
throw new NCE(s"Invalid token IDs for 'enabledBuiltInTokens' model property [" +
s"mdlId=${mdl.getId}, " +
- s"ids=${unsToks.mkString(", ")}" +
+ s"ids=${unsToksBlt.mkString(", ")}" +
s"]")
+
+ // We can't check other names because they can be created by custom parsers.
+ val unsToksAbstract = mdl.getAbstractTokens.asScala.filter(t ⇒ t == null || t == "nlpcraft:nlp")
+
+ if (unsToksAbstract.nonEmpty)
+ throw new NCE(s"Invalid token IDs for 'abstractToken' model property [" +
+ s"mdlId=${mdl.getId}, " +
+ s"ids=${unsToksAbstract.mkString(", ")}" +
+ s"]"
+ )
}
/**
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/NCProbeEnrichmentManager.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/NCProbeEnrichmentManager.scala
index e304822..ed1b544 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/NCProbeEnrichmentManager.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/NCProbeEnrichmentManager.scala
@@ -484,7 +484,7 @@ object NCProbeEnrichmentManager extends NCService with NCOpenCensusModelStats {
s"]")
}
- nlpSen.clone().collapse().
+ nlpSen.clone().collapse(mdl.model, lastPhase = true).
// Sorted to support deterministic logs.
sortBy(p ⇒
p.map(p ⇒ {
@@ -525,7 +525,7 @@ object NCProbeEnrichmentManager extends NCService with NCOpenCensusModelStats {
val meta = mutable.HashMap.empty[String, Any] ++ senMeta
val req = NCRequestImpl(meta, srvReqId)
- var senVars = NCProbeVariants.convert(srvReqId, mdl, sensSeq)
+ var senVars = NCProbeVariants.convert(srvReqId, mdl, sensSeq, lastPhase = true)
// Sentence variants can be filtered by model.
val fltSenVars: Seq[(NCVariant, Int)] =
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/limit/NCLimitEnricher.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/limit/NCLimitEnricher.scala
index fcfafd9..b270e6a 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/limit/NCLimitEnricher.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/limit/NCLimitEnricher.scala
@@ -74,11 +74,7 @@ object NCLimitEnricher extends NCProbeEnricher {
* @param isFuzzyNum Fuzzy value flag.
*/
case class Group(tokens: Seq[NCNlpSentenceToken], number: Option[Int], isFuzzyNum: Boolean) {
- lazy val value: String = number match {
- case Some(_) ⇒ CD
- case None ⇒ tokens.map(_.stem).mkString(" ")
- }
-
+ lazy val value: String = if (number.isDefined) CD else tokens.map(_.stem).mkString(" ")
lazy val index: Int = tokens.head.index
}
@@ -145,10 +141,10 @@ object NCLimitEnricher extends NCProbeEnricher {
private def isUserNotValue(n: NCNlpSentenceNote): Boolean = n.isUser && !n.contains("value")
/**
- *
- * @param parent Optional parent span.
- * @return
- */
+ *
+ * @param parent Optional parent span.
+ * @return
+ */
override def start(parent: Span = null): NCService = startScopedSpan("start", parent) { _ ⇒
ackStarting()
@@ -211,9 +207,9 @@ object NCLimitEnricher extends NCProbeEnricher {
}
/**
- *
- * @param parent Optional parent span.
- */
+ *
+ * @param parent Optional parent span.
+ */
override def stop(parent: Span = null): Unit = startScopedSpan("stop", parent) { _ ⇒
ackStopping()
@@ -255,25 +251,33 @@ object NCLimitEnricher extends NCProbeEnricher {
"mdlId" → mdl.model.getId,
"txt" → ns.text) { _ ⇒
val notes = mutable.HashSet.empty[NCNlpSentenceNote]
- val numsMap = NCNumericManager.find(ns).map(p ⇒ p.tokens → p).toMap
- val groupsMap = groupNums(ns, numsMap.values)
+
+ var numsMap: Map[Seq[NCNlpSentenceToken], NCNumeric] = null
+ var groupsMap: Map[Seq[NCNlpSentenceToken], GroupsHolder] = null
+ var tech: Set[NCNlpSentenceToken] = null
// Tries to grab tokens reverse way.
// Example: A, B, C ⇒ ABC, BC, AB .. (BC will be processed first)
- for (toks ← ns.tokenMixWithStopWords().sortBy(p ⇒ (-p.size, -p.head.index)) if validImportant(ns, toks))
- tryToMatch(numsMap, groupsMap, toks) match {
+ for (toks ← ns.tokenMixWithStopWords().sortBy(p ⇒ (-p.size, -p.head.index)) if validImportant(ns, toks)) {
+ if (numsMap == null) {
+ numsMap = NCNumericManager.find(ns).map(p ⇒ p.tokens → p).toMap
+ groupsMap = groupNums(ns, numsMap.values)
+ tech = (numsMap.keys.flatten ++ groupsMap.keys.flatten).toSet
+ }
+
+ tryToMatch(numsMap, groupsMap, tech, toks) match {
case Some(m) ⇒
for (refNote ← m.refNotes) {
- val params = mutable.ArrayBuffer.empty[(String, Any)]
+ val ps = mutable.ArrayBuffer.empty[(String, Any)]
- params += "limit" → m.limit
- params += "indexes" → m.refIndexes
- params += "note" → refNote
+ ps += "limit" → m.limit
+ ps += "indexes" → m.refIndexes
+ ps += "note" → refNote
if (m.asc.isDefined)
- params += "asc" → m.asc.get
+ ps += "asc" → m.asc.get
- val note = NCNlpSentenceNote(m.matched.map(_.index), TOK_ID, params: _*)
+ val note = NCNlpSentenceNote(m.matched.map(_.index), TOK_ID, ps: _*)
if (!notes.exists(n ⇒ ns.notesEqualOrSimilar(n, note))) {
notes += note
@@ -283,6 +287,7 @@ object NCLimitEnricher extends NCProbeEnricher {
}
case None ⇒ // No-op.
}
+ }
}
}
@@ -290,32 +295,22 @@ object NCLimitEnricher extends NCProbeEnricher {
*
* @param toks
*/
- private def getCommonNotes(toks: Seq[NCNlpSentenceToken]): Set[String] =
- if (toks.isEmpty)
- Set.empty
- else {
- def getCommon(sortedToks: Seq[NCNlpSentenceToken]): Set[String] = {
- require(sortedToks.nonEmpty)
-
- val h = sortedToks.head
- val l = sortedToks.last
-
- h.filter(!_.isNlp).filter(n ⇒ h.index == n.tokenFrom && l.index == n.tokenTo).map(_.noteType).toSet
+ private def getCommonNotes(toks: Seq[NCNlpSentenceToken]): Set[String] = {
+ def get(sorted: Seq[NCNlpSentenceToken]): Set[String] =
+ sorted.size match {
+ case 0 ⇒ Set.empty
+ case _ ⇒
+ val h = sorted.head
+ val l = sorted.last
+
+ h.filter(!_.isNlp).filter(n ⇒ h.index == n.tokenFrom && l.index == n.tokenTo).map(_.noteType).toSet
}
- var sortedToks = toks.sortBy(_.index)
-
- var res = getCommon(sortedToks)
-
- if (res.isEmpty) {
- sortedToks = sortedToks.filter(!_.isStopWord)
-
- if (sortedToks.nonEmpty)
- res = getCommon(sortedToks)
- }
+ val sortedToks = toks.sortBy(_.index)
+ val res = get(sortedToks)
- if (res.isEmpty) Set.empty else res
- }
+ if (res.nonEmpty) res else get(sortedToks.filter(!_.isStopWord))
+ }
/**
*
@@ -326,44 +321,42 @@ object NCLimitEnricher extends NCProbeEnricher {
private def tryToMatch(
numsMap: Map[Seq[NCNlpSentenceToken], NCNumeric],
groupsMap: Map[Seq[NCNlpSentenceToken], GroupsHolder],
+ tech: Set[NCNlpSentenceToken],
toks: Seq[NCNlpSentenceToken]
): Option[Match] = {
- val i1 = toks.head.index
- val i2 = toks.last.index
-
- val refCands = toks.filter(_.exists(n ⇒ isUserNotValue(n) && n.tokenIndexes.head >= i1 && n.tokenIndexes.last <= i2))
-
- // Reference should be last.
- if (refCands.nonEmpty && refCands.last.index == toks.last.index) {
- val commonRefNotes = getCommonNotes(refCands)
-
- if (commonRefNotes.nonEmpty) {
- val matchCands = toks.diff(refCands)
- val idxs = refCands.map(_.index)
-
- def try0(group: Seq[NCNlpSentenceToken]): Option[Match] =
- groupsMap.get(group) match {
- case Some(h) ⇒
- if (limits.contains(h.value) || h.isFuzzyNum)
- Some(Match(h.limit, Some(h.asc), matchCands, commonRefNotes, idxs.asJava))
- else
- numsMap.get(group) match {
- case Some(num) ⇒ Some(Match(num.value, None, matchCands, commonRefNotes, idxs.asJava))
- case None ⇒ None
- }
- case None ⇒ None
- }
-
- try0(matchCands) match {
- case Some(m) ⇒ Some(m)
- case None ⇒ try0(matchCands.filter(!_.isStopWord))
+ def tryCandidates(refCands: Seq[NCNlpSentenceToken]): Option[Match] = {
+ lazy val cmnRefNotes = getCommonNotes(refCands)
+ lazy val matchCands = toks.diff(refCands)
+
+ def try0(g: ⇒ Seq[NCNlpSentenceToken]): Option[Match] =
+ groupsMap.get(g) match {
+ case Some(h) ⇒
+ if (limits.contains(h.value) || h.isFuzzyNum)
+ Some(Match(h.limit, Some(h.asc), matchCands, cmnRefNotes, refCands.map(_.index).asJava))
+ else
+ numsMap.get(g) match {
+ case Some(num) ⇒
+ Some(Match(num.value, None, matchCands, cmnRefNotes, refCands.map(_.index).asJava))
+ case None ⇒
+ None
+ }
+ case None ⇒ None
}
- }
+
+ // Reference should be last.
+ if (refCands.nonEmpty && refCands.last.index == toks.last.index && cmnRefNotes.nonEmpty)
+ Stream(try0(matchCands), try0(matchCands.filter(!_.isStopWord))).flatten.headOption
else
None
}
- else
- None
+
+ val i1 = toks.head.index
+ val i2 = toks.last.index
+
+ def f(seq: ⇒ Seq[NCNlpSentenceToken]): Seq[NCNlpSentenceToken] =
+ seq.filter(_.exists(n ⇒ isUserNotValue(n) && n.tokenIndexes.head >= i1 && n.tokenIndexes.last <= i2))
+
+ Stream(tryCandidates(f(toks)), tryCandidates(f(toks.dropWhile(tech.contains)))).flatten.headOption
}
/**
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
index e2e2265..fe1cac5 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
@@ -387,7 +387,9 @@ object NCModelEnricher extends NCProbeEnricher with DecorateAsScala {
found = false
if (collapsedSens == null)
- collapsedSens = NCProbeVariants.convert(ns.srvReqId, mdl, ns.clone().collapse()).map(_.asScala)
+ collapsedSens =
+ NCProbeVariants.
+ convert(ns.srvReqId, mdl, ns.clone().collapse(mdl.model)).map(_.asScala)
if (seq == null)
seq = convert(ns, collapsedSens, toks)
diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/NCTestContext.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/NCTestContext.scala
index 8bb384b..a398f35 100644
--- a/nlpcraft/src/test/scala/org/apache/nlpcraft/NCTestContext.scala
+++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/NCTestContext.scala
@@ -20,6 +20,7 @@ package org.apache.nlpcraft
import org.apache.nlpcraft.model.tools.embedded.NCEmbeddedProbe
import org.apache.nlpcraft.model.tools.test.{NCTestClient, NCTestClientBuilder}
import org.apache.nlpcraft.probe.mgrs.model.NCModelManager
+import org.junit.jupiter.api.Assertions.{assertEquals, assertTrue}
import org.junit.jupiter.api.TestInstance.Lifecycle
import org.junit.jupiter.api._
@@ -55,7 +56,7 @@ abstract class NCTestContext {
if (getClassAnnotation(info).isDefined)
stop0()
- private def getClassAnnotation(info: TestInfo) =
+ protected def getClassAnnotation(info: TestInfo): Option[NCTestEnvironment] =
if (info.getTestClass.isPresent) Option(info.getTestClass.get().getAnnotation(MDL_CLASS)) else None
private def getMethodAnnotation(info: TestInfo): Option[NCTestEnvironment] =
@@ -110,6 +111,31 @@ abstract class NCTestContext {
protected def afterProbeStop(): Unit = { }
+ /**
+ *
+ * @param txt
+ * @param intent
+ */
+ protected def checkIntent(txt: String, intent: String): Unit = {
+ val res = getClient.ask(txt)
+
+ assertTrue(res.isOk, s"Checked: $txt")
+ assertTrue(res.getResult.isPresent, s"Checked: $txt")
+ assertEquals(intent, res.getIntentId, s"Checked: $txt")
+ }
+
+ /**
+ * @param req
+ * @param expResp
+ */
+ protected def checkResult(req: String, expResp: String): Unit = {
+ val res = getClient.ask(req)
+
+ assertTrue(res.isOk)
+ assertTrue(res.getResult.isPresent)
+ assertEquals(expResp, res.getResult.get)
+ }
+
final protected def getClient: NCTestClient = {
if (cli == null)
throw new IllegalStateException("Client is not started.")
diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/examples/time/NCTimeModelSpec.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/examples/time/NCTimeModelSpec.scala
index 26ff1b8..c1bc764 100644
--- a/nlpcraft/src/test/scala/org/apache/nlpcraft/examples/time/NCTimeModelSpec.scala
+++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/examples/time/NCTimeModelSpec.scala
@@ -17,38 +17,28 @@
package org.apache.nlpcraft.examples.time
-import java.io.IOException
-
import org.apache.nlpcraft.common.NCException
import org.apache.nlpcraft.{NCTestContext, NCTestEnvironment}
-import org.junit.jupiter.api.Assertions.assertTrue
import org.junit.jupiter.api.Test
+import java.io.IOException
+
@NCTestEnvironment(model = classOf[TimeModel], startClient = true)
class NCTimeModelSpec extends NCTestContext {
@Test
@throws[NCException]
@throws[IOException]
private[time] def testIntentsPriorities(): Unit = {
- val cli = getClient
-
- def check(txt: String, id: String): Unit = {
- val res = cli.ask(txt)
-
- assertTrue(res.isOk)
- assertTrue(res.getIntentId == id)
- }
-
// intent1 must be winner for `What's the local time?` question, because exact matching.
// Accumulated history (geo:city information) must be ignored.
// 1. Without conversation.
- check("Show me time of the day in London.", "intent2")
- cli.clearConversation()
- check("What's the local time?", "intent1")
+ checkIntent("Show me time of the day in London.", "intent2")
+ getClient.clearConversation()
+ checkIntent("What's the local time?", "intent1")
// 2. The same with conversation.
- check("Show me time of the day in London.", "intent2")
- check("What's the local time?", "intent1")
+ checkIntent("Show me time of the day in London.", "intent2")
+ checkIntent("What's the local time?", "intent1")
}
}
diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/model/NCIntentDslSpec.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/model/NCIntentDslSpec.scala
index 9946582..b662b07 100644
--- a/nlpcraft/src/test/scala/org/apache/nlpcraft/model/NCIntentDslSpec.scala
+++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/model/NCIntentDslSpec.scala
@@ -55,21 +55,13 @@ class NCIntentDslSpecModel extends NCModelAdapter(
*/
@NCTestEnvironment(model = classOf[NCIntentDslSpecModel], startClient = true)
class NCIntentDslSpec extends NCTestContext {
- private def check(txt: String, intent: String): Unit = {
- val res = getClient.ask(txt)
-
- assertTrue(res.isOk, s"Checked: $txt")
- assertTrue(res.getResult.isPresent, s"Checked: $txt")
- assertEquals(intent, res.getIntentId, s"Checked: $txt")
- }
-
@Test
- def testBigCity(): Unit = check("Moscow", "bigCity")
+ def testBigCity(): Unit = checkIntent("Moscow", "bigCity")
@Test
- def testOtherCity(): Unit = check("San Francisco", "otherCity")
+ def testOtherCity(): Unit = checkIntent("San Francisco", "otherCity")
@Test
- def testUserPriority(): Unit = check("Paris", "userElement")
+ def testUserPriority(): Unit = checkIntent("Paris", "userElement")
}
diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/model/NCIntentDslSpec2.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/model/NCIntentDslSpec2.scala
index 06f1610..8bb1ba6 100644
--- a/nlpcraft/src/test/scala/org/apache/nlpcraft/model/NCIntentDslSpec2.scala
+++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/model/NCIntentDslSpec2.scala
@@ -18,7 +18,6 @@
package org.apache.nlpcraft.model
import org.apache.nlpcraft.{NCTestContext, NCTestEnvironment}
-import org.junit.jupiter.api.Assertions.{assertEquals, assertTrue}
import org.junit.jupiter.api.Test
import java.util
@@ -57,21 +56,13 @@ class NCIntentDslSpecModel2 extends NCModelAdapter(
*/
@NCTestEnvironment(model = classOf[NCIntentDslSpecModel2], startClient = true)
class NCIntentDslSpec2 extends NCTestContext {
- private def check(txt: String, intent: String): Unit = {
- val res = getClient.ask(txt)
-
- assertTrue(res.isOk, s"Checked: $txt")
- assertTrue(res.getResult.isPresent, s"Checked: $txt")
- assertEquals(intent, res.getIntentId, s"Checked: $txt")
- }
-
@Test
def test(): Unit = {
- check("a", "a_11")
- check("a a", "a_23")
- check("a a a", "a_23")
- check("a a a a", "a_15")
- check("a a a a a a ", "a_plus")
+ checkIntent("a", "a_11")
+ checkIntent("a a", "a_23")
+ checkIntent("a a a", "a_23")
+ checkIntent("a a a a", "a_15")
+ checkIntent("a a a a a a ", "a_plus")
}
}
diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/model/abstract/NCAbstractTokensEnricherSpec.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/model/abstract/NCAbstractTokensEnricherSpec.scala
new file mode 100644
index 0000000..5bcbb06
--- /dev/null
+++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/model/abstract/NCAbstractTokensEnricherSpec.scala
@@ -0,0 +1,57 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nlpcraft.model.`abstract`
+
+import org.apache.nlpcraft.NCTestEnvironment
+import org.apache.nlpcraft.probe.mgrs.nlp.enrichers.{NCEnricherBaseSpec, NCEnrichersTestContext, NCTestNlpToken ⇒ nlp, NCTestUserToken ⇒ usr}
+import org.junit.jupiter.api.Test
+
+class NCAbstractTokensModelEnrichers extends NCAbstractTokensModel with NCEnrichersTestContext
+
+@NCTestEnvironment(model = classOf[NCAbstractTokensModelEnrichers], startClient = true)
+class NCAbstractTokensEnricherSpec extends NCEnricherBaseSpec {
+ @Test
+ def test(): Unit = {
+ // Checks that there aren't any other variants.
+ runBatch(
+ _ ⇒ checkAll(
+ "word the word",
+ Seq(
+ nlp(text = "word"),
+ usr("the word", "wrapAnyWord")
+ )
+ ),
+ _ ⇒ checkExists(
+ "10 w1 10 w2",
+ nlp(text = "10"),
+ usr("w1 10 w2", "wrapNum")
+ ),
+ _ ⇒ checkExists(
+ "before limit top 6 the any",
+ usr("before limit top 6", "wrapLimit"),
+ usr("the any", "wrapAnyWord")
+ ),
+ _ ⇒ checkExists(
+ "a wrap before limit top 6 the any",
+ nlp("a", isStop = true),
+ usr("wrap before limit top 6", "wrapWrapLimit"),
+ usr("the any", "wrapAnyWord")
+ )
+ )
+ }
+}
\ No newline at end of file
diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/model/abstract/NCAbstractTokensIntentsSpec.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/model/abstract/NCAbstractTokensIntentsSpec.scala
new file mode 100644
index 0000000..1f5635d
--- /dev/null
+++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/model/abstract/NCAbstractTokensIntentsSpec.scala
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nlpcraft.model.`abstract`
+
+import org.apache.nlpcraft.model.{NCIntent, NCIntentMatch, NCResult}
+import org.apache.nlpcraft.{NCTestContext, NCTestEnvironment}
+import org.junit.jupiter.api.Test
+
+class NCAbstractTokensModelIntents extends NCAbstractTokensModel {
+ @NCIntent("intent=wrapAnyWordIntent term(t)={id == 'wrapAnyWord'}")
+ private def onWrapInternal(ctx: NCIntentMatch): NCResult = NCResult.text("OK")
+
+ @NCIntent("intent=wrapNumIntent term(t)={id == 'wrapNum'}")
+ private def onWrapNum(ctx: NCIntentMatch): NCResult = NCResult.text("OK")
+
+ @NCIntent("intent=wrapLimitWrapAnyWord term(t1)={id == 'wrapLimit'} term(t2)={id == 'wrapAnyWord'}")
+ private def wrapLimitWrapAnyWord(ctx: NCIntentMatch): NCResult = NCResult.text("OK")
+
+ @NCIntent("intent=wrapWrapLimit term(t1)={id == 'wrapWrapLimit'} term(t2)={id == 'wrapAnyWord'}")
+ private def wrapWrapLimit(ctx: NCIntentMatch): NCResult = NCResult.text("OK")
+}
+
+@NCTestEnvironment(model = classOf[NCAbstractTokensModelIntents], startClient = true)
+class NCAbstractTokensIntentsSpec extends NCTestContext {
+ @Test
+ def test(): Unit = {
+ // First 'word' - will be deleted (abstract).
+ // Second 'word' - will be swallow (wrapAnyWord element).
+ checkIntent("word the word", "wrapAnyWordIntent")
+
+ // First numeric - will be deleted (abstract).
+ // Second numeric - will be swallow (wrapNum element).
+ checkIntent("10 w1 10 w2", "wrapNumIntent")
+
+ checkIntent("before limit top 6 the any", "wrapLimitWrapAnyWord")
+ checkIntent("a wrap before limit top 6 the any", "wrapWrapLimit")
+ }
+}
\ No newline at end of file
diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/model/abstract/NCAbstractTokensModel.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/model/abstract/NCAbstractTokensModel.scala
new file mode 100644
index 0000000..d3284d3
--- /dev/null
+++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/model/abstract/NCAbstractTokensModel.scala
@@ -0,0 +1,57 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nlpcraft.model.`abstract`
+
+import org.apache.nlpcraft.model.{NCElement, NCModelAdapter}
+
+import java.util
+import scala.collection.JavaConverters._
+
+class NCAbstractTokensModel extends NCModelAdapter(
+ "nlpcraft.abstract.elems.mdl.test", "Abstract Elements Test Model", "1.0"
+) {
+ private implicit val toList: String ⇒ util.List[String] = (s: String) ⇒ Seq(s).asJava
+
+ override def getElements: util.Set[NCElement] =
+ Set(
+ new NCElement {
+ override def getId: String = "anyWord"
+ override def getSynonyms: util.List[String] = "//[a-zA-Z0-9]+//"
+ },
+ new NCElement {
+ override def getId: String = "wrapAnyWord"
+ override def getSynonyms: util.List[String] = "the ^^[internal](id == 'anyWord')^^"
+ },
+ new NCElement {
+ override def getId: String = "wrapNum"
+ override def getSynonyms: util.List[String] = "w1 ^^id == 'nlpcraft:num'^^ w2"
+ },
+ new NCElement {
+ override def getId: String = "wrapLimit"
+ override def getSynonyms: util.List[String] = "before limit ^^[limitAlias](id == 'nlpcraft:limit')^^"
+ },
+ new NCElement {
+ override def getId: String = "wrapWrapLimit"
+ override def getSynonyms: util.List[String] = "wrap ^^[wrapLimitAlias](id == 'wrapLimit')^^"
+ }
+ ).asJava
+
+ override def getAbstractTokens: util.Set[String] = Set("nlpcraft:num", "anyWord").asJava
+ override def isPermutateSynonyms: Boolean = false
+ override def getJiggleFactor: Int = 0
+}
diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/model/abstract/NCAbstractTokensVariantsSpec.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/model/abstract/NCAbstractTokensVariantsSpec.scala
new file mode 100644
index 0000000..349cc61
--- /dev/null
+++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/model/abstract/NCAbstractTokensVariantsSpec.scala
@@ -0,0 +1,168 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nlpcraft.model.`abstract`
+
+import org.apache.nlpcraft.model.{NCContext, NCResult, NCToken}
+import org.apache.nlpcraft.{NCTestContext, NCTestEnvironment}
+import org.junit.jupiter.api.Test
+
+import java.util
+import scala.collection.JavaConverters._
+
+class NCAbstractTokensModelVariants extends NCAbstractTokensModel {
+ private def checkId(t: NCToken, id: String): Unit =
+ require(t.getId == id, s"Expected ID: $id, token: $t")
+ private def checkText(t: NCToken, txt: String): Unit =
+ require(t.getOriginalText == txt, s"Expected text: $txt, token: $t")
+
+ override def onContext(ctx: NCContext): NCResult = {
+ val variants = ctx.getVariants.asScala
+
+ def checkLimit(limitPart: NCToken): Unit = {
+ require(limitPart.getIndex == -1, s"Unexpected limit token: $limitPart, meta: ${limitPart.getMetadata}")
+ checkId(limitPart, "nlpcraft:limit")
+
+ val limNote = limitPart.getMetadata.get("nlpcraft:limit:note").asInstanceOf[String]
+
+ require(limNote == "wrapAnyWord", s"Unexpected limit token: $limitPart, meta: ${limitPart.getMetadata}")
+
+ val limIdxs =
+ limitPart.getMetadata.get("nlpcraft:limit:indexes").
+ asInstanceOf[util.List[Integer]].asScala
+
+ require(
+ limIdxs.size == 1 && limIdxs.head == -1,
+ s"Unexpected limit token: $limitPart, meta: ${limitPart.getMetadata}"
+ )
+ }
+
+ ctx.getRequest.getNormalizedText match {
+ case "word the word" ⇒
+ require(variants.size == 1)
+
+ val toks = variants.head.asScala
+
+ require(toks.size == 2)
+
+ checkId(toks.head, "nlpcraft:nlp")
+ checkText(toks.head, "word")
+
+ checkId(toks.last, "wrapAnyWord")
+ checkText(toks.last, "the word")
+
+ val t2Parts = toks.last.getPartTokens.asScala
+
+ require(t2Parts.size == 2)
+
+ checkId(t2Parts.head,"anyWord")
+ checkId(t2Parts.last, "anyWord")
+
+ t2Parts.foreach(t ⇒ require(t.isAbstract, s"Unexpected abstract token: $t"))
+
+ case "10 w1 10 w2" ⇒
+ require(variants.nonEmpty)
+
+ val vars = variants.
+ map(p ⇒ p.asScala).
+ filter(v ⇒ v.size == 2 && v.head.getId == "nlpcraft:nlp" && v.last.getId == "wrapNum")
+
+ require(vars.size == 1)
+
+ val toks = vars.head
+
+ require(toks.size == 2)
+
+ checkText(toks.head, "10")
+ checkText(toks.last,"w1 10 w2")
+
+ val t2Parts = toks.last.getPartTokens.asScala
+
+ require(t2Parts.size == 3)
+
+ checkId(t2Parts.head,"nlpcraft:nlp")
+ checkId(t2Parts(1),"nlpcraft:num")
+ checkId(t2Parts.last,"nlpcraft:nlp")
+
+ case "before limit top 6 the any" ⇒
+ require(variants.nonEmpty)
+
+ val vars = variants.
+ map(p ⇒ p.asScala).
+ filter(v ⇒ v.size == 2 && v.head.getId == "wrapLimit" && v.last.getId == "wrapAnyWord")
+
+ require(vars.size == 1)
+
+ val toks = vars.head
+
+ require(toks.size == 2)
+
+ checkText(toks.head, "before limit top 6")
+ checkText(toks.last,"the any")
+
+ val wrap = toks.head.getPartTokens.asScala
+
+ require(wrap.size == 3)
+
+ checkLimit(wrap.last)
+
+ case "a wrap before limit top 6 the any" ⇒
+ require(variants.nonEmpty)
+
+ val vars = variants.
+ map(p ⇒ p.asScala).
+ filter(v ⇒ v.size == 3 && v(1).getId == "wrapWrapLimit" && v.last.getId == "wrapAnyWord")
+
+ require(vars.size == 1)
+
+ val toks = vars.head
+
+ require(toks.size == 3)
+
+ checkText(toks.head, "a")
+ checkText(toks(1), "wrap before limit top 6")
+ checkText(toks.last,"the any")
+
+ val wrap = toks(1).getPartTokens.asScala
+
+ require(wrap.size == 2)
+
+ val part = wrap.last
+
+ require(part.getIndex == -1, s"Unexpected limit token: $part, meta: ${part.getMetadata}")
+ checkId(part,"wrapLimit")
+
+ require(part.getPartTokens.size == 3)
+
+ checkLimit(part.getPartTokens.asScala.last)
+ case _ ⇒ throw new AssertionError(s"Unexpected request: ${ctx.getRequest.getNormalizedText}")
+ }
+
+ NCResult.text("OK")
+ }
+}
+
+@NCTestEnvironment(model = classOf[NCAbstractTokensModelVariants], startClient = true)
+class NCAbstractTokensVariantsSpec extends NCTestContext {
+ @Test
+ def test(): Unit = {
+ checkResult("word the word", "OK")
+ checkResult("10 w1 10 w2", "OK")
+ checkResult("before limit top 6 the any", "OK")
+ checkResult("a wrap before limit top 6 the any", "OK")
+ }
+}
\ No newline at end of file
diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/models/stm/NCStmTestModelSpec.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/models/stm/NCStmTestModelSpec.scala
index 31b8690..5fd0084 100644
--- a/nlpcraft/src/test/scala/org/apache/nlpcraft/models/stm/NCStmTestModelSpec.scala
+++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/models/stm/NCStmTestModelSpec.scala
@@ -18,7 +18,6 @@
package org.apache.nlpcraft.models.stm
import org.apache.nlpcraft.{NCTestContext, NCTestEnvironment}
-import org.junit.jupiter.api.Assertions.{assertEquals, assertTrue}
import org.junit.jupiter.api.Test
/**
@@ -27,26 +26,14 @@ import org.junit.jupiter.api.Test
@NCTestEnvironment(model = classOf[NCStmTestModel], startClient = true)
class NCStmTestModelSpec extends NCTestContext {
/**
- * @param req
- * @param expResp
- */
- private def check(req: String, expResp: String): Unit = {
- val res = getClient.ask(req)
-
- assertTrue(res.isOk)
- assertTrue(res.getResult.isPresent)
- assertEquals(expResp, res.getResult.get)
- }
-
- /**
* Checks behaviour. It is based on intents and elements groups.
*/
@Test
private[stm] def test(): Unit = for (i ← 0 until 3) {
- check("sale", "sale")
- check("best", "sale_best_employee")
- check("buy", "buy")
- check("best", "buy_best_employee")
- check("sale", "sale")
+ checkResult("sale", "sale")
+ checkResult("best", "sale_best_employee")
+ checkResult("buy", "buy")
+ checkResult("best", "buy_best_employee")
+ checkResult("sale", "sale")
}
}
\ No newline at end of file
diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/NCDefaultTestModel.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/NCDefaultTestModel.scala
index 8deb578..fe03c64 100644
--- a/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/NCDefaultTestModel.scala
+++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/NCDefaultTestModel.scala
@@ -17,19 +17,18 @@
package org.apache.nlpcraft.probe.mgrs.nlp.enrichers
-import java.util
-import java.util.Collections
-
-import org.apache.nlpcraft.model.{NCContext, NCElement, NCModelAdapter, NCResult, NCValue}
+import org.apache.nlpcraft.model.{NCElement, NCModelAdapter, NCResult, NCValue}
import org.apache.nlpcraft.probe.mgrs.nlp.enrichers.NCDefaultTestModel._
+import java.util
+import java.util.Collections
import scala.collection.JavaConverters._
import scala.language.implicitConversions
/**
* Enrichers default test model.
*/
-class NCDefaultTestModel extends NCModelAdapter(ID, "Model enrichers test", "1.0") {
+class NCDefaultTestModel extends NCModelAdapter(ID, "Model enrichers test", "1.0") with NCEnrichersTestContext {
private implicit def convert(s: String): NCResult = NCResult.text(s)
override def getElements: util.Set[NCElement] =
@@ -61,11 +60,6 @@ class NCDefaultTestModel extends NCModelAdapter(ID, "Model enrichers test", "1.0
}).asJava
}
- override final def onContext(ctx: NCContext): NCResult =
- NCResult.text(
- NCTestSentence.serialize(ctx.getVariants.asScala.map(v ⇒ NCTestSentence(v.asScala.map(NCTestToken(_)))))
- )
-
final override def getId: String = ID
}
diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/NCEnricherBaseSpec.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/NCEnricherBaseSpec.scala
index cfd39d0..26d536c 100644
--- a/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/NCEnricherBaseSpec.scala
+++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/NCEnricherBaseSpec.scala
@@ -17,14 +17,29 @@
package org.apache.nlpcraft.probe.mgrs.nlp.enrichers
-import org.apache.nlpcraft.NCTestContext
+import org.apache.nlpcraft.{NCTestContext, NCTestEnvironment}
import org.junit.jupiter.api.Assertions.{assertTrue, fail}
+import org.junit.jupiter.api.{BeforeEach, TestInfo}
import org.scalatest.Assertions
/**
* Enrichers tests utility base class.
*/
abstract class NCEnricherBaseSpec extends NCTestContext {
+ @BeforeEach
+ def before(info: TestInfo): Unit = {
+ val env = getClassAnnotation(info).
+ getOrElse(
+ throw new IllegalStateException(
+ s"Enricher tests should ne annotated by model, see: ${classOf[NCTestEnvironment]}"
+ )
+ )
+
+ if (!(classOf[NCEnrichersTestContext]).isAssignableFrom(env.model()))
+ throw new IllegalStateException(
+ s"Enricher tests should ne annotated by model mixed with: ${classOf[NCEnrichersTestContext]}"
+ )
+ }
/**
* Checks single variant.
*
diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/NCEnrichersTestContext.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/NCEnrichersTestContext.scala
new file mode 100644
index 0000000..c6c5de9
--- /dev/null
+++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/NCEnrichersTestContext.scala
@@ -0,0 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nlpcraft.probe.mgrs.nlp.enrichers
+
+import org.apache.nlpcraft.model.{NCContext, NCModel, NCResult}
+
+import scala.collection.JavaConverters._
+
+/**
+ * Enricher test model behaviour.
+ */
+trait NCEnrichersTestContext extends NCModel {
+ override final def onContext(ctx: NCContext): NCResult =
+ NCResult.text(
+ NCTestSentence.serialize(ctx.getVariants.asScala.map(v ⇒ NCTestSentence(v.asScala.map(NCTestToken(_)))))
+ )
+}
diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/sort/NCEnricherSortSpec.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/sort/NCEnricherSortSpec.scala
index 3317331..cc03066 100644
--- a/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/sort/NCEnricherSortSpec.scala
+++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/sort/NCEnricherSortSpec.scala
@@ -153,11 +153,6 @@ class NCEnricherSortSpec extends NCEnricherBaseSpec {
usr(text = "A", id = "A"),
usr(text = "B", id = "B"),
srt(text = "classify", subjNotes = Seq("B"), subjIndexes = Seq(1))
- ),
- Seq(
- usr(text = "A", id = "A"),
- usr(text = "B", id = "B"),
- nlp(text = "classify")
)
),
_ ⇒ checkAll(