You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nlpcraft.apache.org by ar...@apache.org on 2020/03/23 22:45:05 UTC
[incubator-nlpcraft] branch master updated: Fix fot NLPCRAFT-15.
This is an automated email from the ASF dual-hosted git repository.
aradzinski pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
The following commit(s) were added to refs/heads/master by this push:
new 05b7162 Fix fot NLPCRAFT-15.
05b7162 is described below
commit 05b71627501324ea7bdc14675f167662986b95ec
Author: Aaron Radzinzski <ar...@datalingvo.com>
AuthorDate: Mon Mar 23 15:44:48 2020 -0700
Fix fot NLPCRAFT-15.
---
.../apache/nlpcraft/common/nlp/NCNlpSentence.scala | 29 +++++-
.../nlpcraft/common/nlp/NCNlpSentenceNote.scala | 83 ++++++++--------
.../nlpcraft/common/nlp/NCNlpSentenceToken.scala | 54 +++++------
.../common/nlp/core/NCNlpCoreManager.scala | 2 -
.../nlpcraft/common/nlp/core/NCNlpTokenizer.scala | 4 +-
.../nlp/core/opennlp/NCOpenNlpTokenizer.scala | 19 +---
.../nlp/core/stanford/NCStanfordTokenizer.scala | 3 +-
.../apache/nlpcraft/model/impl/NCTokenLogger.scala | 4 +-
.../org/apache/nlpcraft/probe/NCProbeBoot.scala | 4 +-
.../probe/mgrs/deploy/NCDeployManager.scala | 2 +
.../nlpcraft/probe/mgrs/nlp/NCProbeEnricher.scala | 39 +-------
.../probe/mgrs/nlp/NCProbeEnrichmentManager.scala | 39 +++++---
.../aggregation/NCAggregationEnricher.scala | 12 +--
.../dictionary/NCDictionaryEnricher.scala | 25 +++--
.../mgrs/nlp/enrichers/limit/NCLimitEnricher.scala | 38 +++-----
.../mgrs/nlp/enrichers/model/NCModelEnricher.scala | 19 ++--
.../nlp/enrichers/post/NCPostEnrichProcessor.scala | 107 ++++++++------------
.../enrichers/relation/NCRelationEnricher.scala | 17 ++--
.../mgrs/nlp/enrichers/sort/NCSortEnricher.scala | 13 +--
.../enrichers/stopword/NCStopWordEnricher.scala | 108 +++++----------------
.../suspicious/NCSuspiciousNounsEnricher.scala | 12 +--
.../nlp/core/opennlp/NCOpenNlpNerEnricher.scala | 9 +-
.../server/nlp/core/opennlp/NCOpenNlpParser.scala | 7 --
.../server/nlp/enrichers/date/NCDateEnricher.scala | 16 ++-
.../server/nlp/enrichers/geo/NCGeoEnricher.scala | 10 +-
.../nlp/enrichers/quote/NCQuoteEnricher.scala | 2 +-
.../enrichers/stopword/NCStopWordEnricher.scala | 20 ++--
.../nlpcraft/model/intent/dsl/NCDslTest.java | 2 +
.../nlpcraft/models/nested/NCNestedTestModel.scala | 4 +-
.../mgrs/nlp/enrichers/NCEnricherBaseSpec.scala | 27 +++++-
.../mgrs/nlp/enrichers/NCEnricherTestModel.scala | 18 +++-
.../mgrs/nlp/enrichers/NCEnrichersTestBeans.scala | 24 ++++-
.../NCEnricherAggregationSpec.scala} | 18 ++--
.../nlp/enrichers/limit/NCEnricherLimitSpec.scala | 20 +++-
.../NCEnricherRelationSpec.scala} | 20 ++--
35 files changed, 371 insertions(+), 459 deletions(-)
diff --git a/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala b/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala
index fdecb85..b88a472 100644
--- a/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala
+++ b/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala
@@ -38,8 +38,10 @@ class NCNlpSentence(
val enabledBuiltInToks: Set[String],
override val tokens: ArrayBuffer[NCNlpSentenceToken] = new ArrayBuffer[NCNlpSentenceToken](32)
) extends NCNlpSentenceTokenBuffer(tokens) with java.io.Serializable {
- private lazy val hash =
- Seq(srvReqId, text, enabledBuiltInToks, tokens).map(_.hashCode()).foldLeft(0)((a, b) ⇒ 31 * a + b)
+ @transient
+ private var hash: java.lang.Integer = _
+
+ private def calcHash(): Int = Seq(srvReqId, text, enabledBuiltInToks, tokens).map(_.hashCode()).foldLeft(0)((a, b) ⇒ 31 * a + b)
// Deep copy.
override def clone(): NCNlpSentence = new NCNlpSentence(srvReqId, text, weight, enabledBuiltInToks, tokens.map(_.clone()))
@@ -57,11 +59,28 @@ class NCNlpSentence(
* Utility method that removes note with given ID from all tokens in this sentence.
* No-op if such note wasn't found.
*
- * @param id Note ID.
+ * @param note Note.
*/
- def removeNote(id: String): Unit = this.foreach(_.remove(id))
+ def removeNote(note: NCNlpSentenceNote): Unit = this.foreach(_.remove(note))
+
+ //noinspection HashCodeUsesVar
+ override def hashCode(): Int = {
+ if (hash == null)
+ hash = calcHash()
+
+ hash
+ }
- override def hashCode(): Int = hash
+ def fixNote(note: NCNlpSentenceNote, kvs: (String, java.io.Serializable)*): Unit = {
+ val fixed = note.clone(kvs: _*)
+
+ this.filter(t ⇒ t.index >= fixed.tokenIndexes.head && t.index <= fixed.tokenIndexes.last).foreach(t ⇒ {
+ t.remove(note)
+ t.add(fixed)
+ })
+
+ hash = null
+ }
override def equals(obj: Any): Boolean = obj match {
case x: NCNlpSentence ⇒
diff --git a/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentenceNote.scala b/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentenceNote.scala
index acf007a..d707000 100644
--- a/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentenceNote.scala
+++ b/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentenceNote.scala
@@ -17,7 +17,6 @@
package org.apache.nlpcraft.common.nlp
-
import org.apache.nlpcraft.common._
import org.apache.nlpcraft.common.ascii._
@@ -28,27 +27,22 @@ import scala.language.implicitConversions
/**
* Sentence token note is a typed map of KV pairs.
*
- * @param id Internal ID.
*/
-class NCNlpSentenceNote(
- val id: String,
- val values: mutable.HashMap[String, java.io.Serializable] = mutable.HashMap[String, java.io.Serializable]()
-) extends java.io.Serializable with NCAsciiLike {
+class NCNlpSentenceNote(private val values: Map[String, java.io.Serializable]) extends java.io.Serializable with NCAsciiLike {
import NCNlpSentenceNote._
- private val hash: Int = id.hashCode()
-
- this.put("unid", this.id)
+ @transient
+ private lazy val hash = values.hashCode()
// Shortcuts for mandatory fields. (Immutable fields)
- lazy val noteType: String = this("noteType").asInstanceOf[String]
- lazy val tokenFrom: Int = this("tokMinIndex").asInstanceOf[Int] // First index.
- lazy val tokenTo: Int = this("tokMaxIndex").asInstanceOf[Int] // Last index.
- lazy val tokenIndexes: Seq[Int] = this("tokWordIndexes").asInstanceOf[java.util.List[Int]].asScala // Includes 1st and last indices too.
- lazy val wordIndexes: Seq[Int] = this("wordIndexes").asInstanceOf[java.util.List[Int]].asScala // Includes 1st and last indices too.
- lazy val sparsity: Int = this("sparsity").asInstanceOf[Int]
- lazy val isContiguous: Boolean = this("contiguous").asInstanceOf[Boolean]
- lazy val isDirect: Boolean = this("direct").asInstanceOf[Boolean]
+ lazy val noteType: String = values("noteType").asInstanceOf[String]
+ lazy val tokenFrom: Int = values("tokMinIndex").asInstanceOf[Int] // First index.
+ lazy val tokenTo: Int = values("tokMaxIndex").asInstanceOf[Int] // Last index.
+ lazy val tokenIndexes: Seq[Int] = values("tokWordIndexes").asInstanceOf[java.util.List[Int]].asScala // Includes 1st and last indices too.
+ lazy val wordIndexes: Seq[Int] = values("wordIndexes").asInstanceOf[java.util.List[Int]].asScala // Includes 1st and last indices too.
+ lazy val sparsity: Int = values("sparsity").asInstanceOf[Int]
+ lazy val isContiguous: Boolean = values("contiguous").asInstanceOf[Boolean]
+ lazy val isDirect: Boolean = values("direct").asInstanceOf[Boolean]
lazy val isUser: Boolean = {
val i = noteType.indexOf(":")
@@ -59,11 +53,11 @@ class NCNlpSentenceNote(
lazy val isNlp: Boolean = noteType == "nlpcraft:nlp"
// Typed getter.
- def data[T](key: String): T = this(key).asInstanceOf[T]
- def dataOpt[T](key: String): Option[T] = this.get(key).asInstanceOf[Option[T]]
+ def data[T](key: String): T = values(key).asInstanceOf[T]
+ def dataOpt[T](key: String): Option[T] = values.get(key).asInstanceOf[Option[T]]
override def equals(obj: Any): Boolean = obj match {
- case h: NCNlpSentenceNote ⇒ h.hash == hash && h.id == id
+ case h: NCNlpSentenceNote ⇒ h.hashCode() == hashCode() && h.values == values
case _ ⇒ false
}
@@ -74,42 +68,45 @@ class NCNlpSentenceNote(
*/
def clone(indexes: Seq[Int], wordIndexes: Seq[Int], params: (String, Any)*): NCNlpSentenceNote =
NCNlpSentenceNote(
- id,
indexes,
Some(wordIndexes),
noteType,
- this.filter(p ⇒ !SKIP_CLONE.contains(p._1)).toSeq ++ params:_*
+ values.filter(p ⇒ !SKIP_CLONE.contains(p._1)).toSeq ++ params:_*
)
- override def clone(): NCNlpSentenceNote = new NCNlpSentenceNote(id, values.clone())
+ override def clone(): NCNlpSentenceNote = {
+ val m = mutable.Map.empty[String, java.io.Serializable] ++ values
+
+ new NCNlpSentenceNote(m.toMap)
+ }
/**
*
* @return
*/
override def toAscii: String =
- this.iterator.toSeq.sortBy(_._1).foldLeft(NCAsciiTable("Key", "Value"))((t, p) ⇒ t += p).toString
+ values.iterator.toSeq.sortBy(_._1).foldLeft(NCAsciiTable("Key", "Value"))((t, p) ⇒ t += p).toString
/**
*
* @return
*/
def skipNlp(): Map[String, java.io.Serializable] =
- this.filter { case (key, _) ⇒ !SKIP_CLONE.contains(key) && key != "noteType" }.toMap
+ values.filter { case (key, _) ⇒ !SKIP_CLONE.contains(key) && key != "noteType" }
/**
*
*/
def asMetadata(): Map[String, java.io.Serializable] =
if (isUser)
- this.get("meta") match {
+ values.get("meta") match {
case Some(meta) ⇒ meta.asInstanceOf[Map[String, java.io.Serializable]]
case None ⇒ Map.empty[String, java.io.Serializable]
}
else {
val md = mutable.Map.empty[String, java.io.Serializable]
- val m = if (noteType != "nlpcraft:nlp") skipNlp() else this.toMap
+ val m = if (noteType != "nlpcraft:nlp") skipNlp() else values
m.foreach { case (name, value) ⇒ md += (name.toLowerCase() → value)}
@@ -117,11 +114,23 @@ class NCNlpSentenceNote(
}
/**
+ *
+ * @param kvs
+ */
+ def clone(kvs : (String, java.io.Serializable)*): NCNlpSentenceNote = {
+ val m = mutable.HashMap.empty[String, java.io.Serializable] ++ values
+
+ kvs.foreach(kv ⇒ m += kv._1 → kv._2)
+
+ new NCNlpSentenceNote(m.toMap)
+ }
+
+ /**
*
* @return
*/
override def toString: String =
- this.toSeq.filter(_._1 != "unid").sortBy(t ⇒ { // Don't show internal ID.
+ values.toSeq.sortBy(t ⇒ { // Don't show internal ID.
val typeSort = t._1 match {
case "noteType" ⇒ 1
case _ ⇒ Math.abs(t._1.hashCode)
@@ -134,7 +143,6 @@ object NCNlpSentenceNote {
// These properties should be cloned as they are auto-set when new clone
// is created.
private final val SKIP_CLONE: Set[String] = Set(
- "unid",
"minIndex",
"maxIndex",
"wordIndexes",
@@ -148,19 +156,20 @@ object NCNlpSentenceNote {
private final val TOK_PREFIXES = Set("nlpcraft", "google", "opennlp", "stanford", "spacy")
- implicit def getValues(x: NCNlpSentenceNote): mutable.HashMap[String, java.io.Serializable] = x.values
+ /**
+ * To immutable map.
+ */
+ implicit def values(note: NCNlpSentenceNote): Map[String, java.io.Serializable] = note.values
/**
* Creates new note with given parameters.
*
- * @param id Internal ID.
* @param indexes Indexes in the sentence.
* @param wordIndexesOpt Word indexes. Optional.
* @param typ Type of the node.
* @param params Parameters.
*/
def apply(
- id: String,
indexes: Seq[Int],
wordIndexesOpt: Option[Seq[Int]],
typ: String,
@@ -172,7 +181,6 @@ object NCNlpSentenceNote {
val (sparsity, tokMinIndex, tokMaxIndex, tokWordIndexes, len) = calc(wordIndexesOpt.getOrElse(indexes))
new NCNlpSentenceNote(
- id,
mutable.HashMap[String, java.io.Serializable]((
params.filter(_._2 != null) :+
("noteType" → typ) :+
@@ -185,7 +193,7 @@ object NCNlpSentenceNote {
("wordLength" → len) :+
("sparsity" → sparsity) :+
("contiguous" → (sparsity == 0))
- ).map(p ⇒ p._1 → p._2.asInstanceOf[java.io.Serializable]): _*)
+ ).map(p ⇒ p._1 → p._2.asInstanceOf[java.io.Serializable]): _*).toMap
)
}
@@ -197,17 +205,16 @@ object NCNlpSentenceNote {
* @param params Parameters.
*/
def apply(indexes: Seq[Int], typ: String, params: (String, Any)*): NCNlpSentenceNote =
- apply(U.genGuid(), indexes, None, typ, params: _*)
+ apply(indexes, None, typ, params: _*)
/**
* Creates new note with given parameters.
*
- * @param id ID.
* @param indexes Indexes in the sentence.
* @param wordIndexes Word indexes in the sentence.
* @param typ Type of the node.
* @param params Parameters.
*/
- def apply(id: String, indexes: Seq[Int], wordIndexes: Seq[Int], typ: String, params: (String, Any)*): NCNlpSentenceNote =
- apply(id, indexes, Some(wordIndexes), typ, params: _*)
+ def apply(indexes: Seq[Int], wordIndexes: Seq[Int], typ: String, params: (String, Any)*): NCNlpSentenceNote =
+ apply(indexes, Some(wordIndexes), typ, params: _*)
}
diff --git a/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentenceToken.scala b/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentenceToken.scala
index be4c467..7be0bee 100644
--- a/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentenceToken.scala
+++ b/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentenceToken.scala
@@ -28,7 +28,7 @@ import scala.language.implicitConversions
*/
case class NCNlpSentenceToken(
index: Int,
- notes: mutable.HashMap[String, NCNlpSentenceNote] = mutable.HashMap.empty[String, NCNlpSentenceNote],
+ private val notes: mutable.HashSet[NCNlpSentenceNote] = mutable.HashSet.empty[NCNlpSentenceNote],
stopsReasons: mutable.HashSet[NCNlpSentenceNote] = mutable.HashSet.empty[NCNlpSentenceNote]
) extends java.io.Serializable {
@transient
@@ -59,7 +59,7 @@ case class NCNlpSentenceToken(
*
* @param noteType Note type.
*/
- def getNotes(noteType: String): Iterable[NCNlpSentenceNote] = notes.values.filter(_.noteType == noteType)
+ def getNotes(noteType: String): Iterable[NCNlpSentenceNote] = notes.filter(_.noteType == noteType)
/**
* Clones note.
@@ -69,9 +69,9 @@ case class NCNlpSentenceToken(
NCNlpSentenceToken(
index,
{
- val m = mutable.HashMap.empty[String, NCNlpSentenceNote]
+ val m = mutable.HashSet.empty[NCNlpSentenceNote]
- notes.foreach { case (key, note) ⇒ m += key → note.clone() }
+ notes.foreach(n ⇒ m += n.clone())
m
},
@@ -87,21 +87,17 @@ case class NCNlpSentenceToken(
/**
* Removes note with given ID. No-op if ID wasn't found.
*
- * @param id Note ID.
+ * @param note Note.
*/
- def remove(id: String): Unit = notes -= id
+ def remove(note: NCNlpSentenceNote): Unit = notes.remove(note)
/**
- * Removes notes with given IDs. No-op if ID wasn't found.
- *
- * @param ids Note IDs.
+ * Tests whether or not this token contains note.
+ * It is important to convert notes to set each time,
+ * because otherwise note cannot be found because its content changed and its hashCode changed too.
+ * https://stackoverflow.com/questions/43553806/hashset-contains-returns-false-when-it-shouldnt/43554123
*/
- def remove(ids: Iterable[String]): Unit = notes --= ids
-
- /**
- * Tests whether or not this token contains note with given ID.
- */
- def contains(id: String): Boolean = notes.contains(id)
+ def contains(note: NCNlpSentenceNote): Boolean = notes.contains(note)
/**
*
@@ -139,7 +135,7 @@ case class NCNlpSentenceToken(
*/
def getNlpNote: NCNlpSentenceNote = {
if (nlpNote == null)
- nlpNote = notes.values.find(_.isNlp).orNull
+ nlpNote = notes.find(_.isNlp).orNull
nlpNote
}
@@ -172,25 +168,25 @@ case class NCNlpSentenceToken(
/**
* Adds element.
*
- * @param elem Element.
+ * @param note Element.
*/
- def add(elem: NCNlpSentenceNote): Unit = {
- notes += elem.id → elem
+ def add(note: NCNlpSentenceNote): Unit = {
+ val added = notes.add(note)
- if (elem.isNlp)
- nlpNote = elem
+ if (added && note.isNlp)
+ nlpNote = note
}
/**
* Simple word is a non synthetic word that's also not part of any domain-specific note type.
*/
- def isNlp: Boolean = this.forall(_.isNlp)
+ def isNlp: Boolean = notes.forall(_.isNlp)
/**
*
* @return
*/
- def isUser: Boolean = this.exists(_.isUser)
+ def isUser: Boolean = notes.exists(_.isUser)
/**
*
@@ -198,15 +194,13 @@ case class NCNlpSentenceToken(
*/
def addStopReason(reason: NCNlpSentenceNote): Unit = stopsReasons += reason
- /**
- *
- */
- def markAsStop(): Unit = getNlpNote += "stopWord" → true
-
override def toString: String =
- notes.values.toSeq.sortBy(t ⇒ (if (t.isNlp) 0 else 1, t.noteType)).mkString("NLP token [", "|", "]")
+ notes.toSeq.sortBy(t ⇒ (if (t.isNlp) 0 else 1, t.noteType)).mkString("NLP token [", "|", "]")
}
object NCNlpSentenceToken {
- implicit def toNotes(x: NCNlpSentenceToken): Iterable[NCNlpSentenceNote] = x.notes.values
+ /**
+ * To immutable iterator.
+ */
+ implicit def notes(x: NCNlpSentenceToken): Iterable[NCNlpSentenceNote] = x.notes.toSet
}
diff --git a/src/main/scala/org/apache/nlpcraft/common/nlp/core/NCNlpCoreManager.scala b/src/main/scala/org/apache/nlpcraft/common/nlp/core/NCNlpCoreManager.scala
index 0896af7..6dedbbd 100644
--- a/src/main/scala/org/apache/nlpcraft/common/nlp/core/NCNlpCoreManager.scala
+++ b/src/main/scala/org/apache/nlpcraft/common/nlp/core/NCNlpCoreManager.scala
@@ -57,8 +57,6 @@ object NCNlpCoreManager extends NCService {
case _ ⇒ throw new AssertionError(s"Unexpected engine: ${Config.engine}")
}
- tokenizer.start()
-
super.start()
}
diff --git a/src/main/scala/org/apache/nlpcraft/common/nlp/core/NCNlpTokenizer.scala b/src/main/scala/org/apache/nlpcraft/common/nlp/core/NCNlpTokenizer.scala
index 5e551b3..3391ab2 100644
--- a/src/main/scala/org/apache/nlpcraft/common/nlp/core/NCNlpTokenizer.scala
+++ b/src/main/scala/org/apache/nlpcraft/common/nlp/core/NCNlpTokenizer.scala
@@ -17,12 +17,10 @@
package org.apache.nlpcraft.common.nlp.core
-import org.apache.nlpcraft.common.NCService
-
/**
* NLP tokenizer.
*/
-trait NCNlpTokenizer extends NCService {
+trait NCNlpTokenizer {
/**
* Tokenizes the sentence.
*
diff --git a/src/main/scala/org/apache/nlpcraft/common/nlp/core/opennlp/NCOpenNlpTokenizer.scala b/src/main/scala/org/apache/nlpcraft/common/nlp/core/opennlp/NCOpenNlpTokenizer.scala
index 042d245..6818dca 100644
--- a/src/main/scala/org/apache/nlpcraft/common/nlp/core/opennlp/NCOpenNlpTokenizer.scala
+++ b/src/main/scala/org/apache/nlpcraft/common/nlp/core/opennlp/NCOpenNlpTokenizer.scala
@@ -19,10 +19,9 @@ package org.apache.nlpcraft.common.nlp.core.opennlp
import java.io.BufferedInputStream
-import io.opencensus.trace.Span
import opennlp.tools.tokenize.{Tokenizer, TokenizerME, TokenizerModel}
import org.apache.nlpcraft.common.nlp.core.{NCNlpCoreToken, NCNlpTokenizer}
-import org.apache.nlpcraft.common.{NCService, _}
+import org.apache.nlpcraft.common._
import resource.managed
import scala.language.{implicitConversions, postfixOps}
@@ -30,21 +29,11 @@ import scala.language.{implicitConversions, postfixOps}
/**
* OpenNLP tokenizer implementation.
*/
-object NCOpenNlpTokenizer extends NCService with NCNlpTokenizer {
+object NCOpenNlpTokenizer extends NCNlpTokenizer {
private final val MODEL_PATH = "opennlp/en-token.bin"
- @volatile private var tokenizer: Tokenizer = _
-
- override def start(parent: Span = null): NCService = startScopedSpan("start", parent, "model" → MODEL_PATH) { _ ⇒
- tokenizer = managed(new BufferedInputStream(U.getStream(MODEL_PATH))) acquireAndGet { in ⇒
- new TokenizerME(new TokenizerModel(in))
- }
-
- super.start()
- }
-
- override def stop(parent: Span = null): Unit = startScopedSpan("stop", parent) { _ ⇒
- super.stop()
+ private val tokenizer: Tokenizer = managed(new BufferedInputStream(U.getStream(MODEL_PATH))) acquireAndGet { in ⇒
+ new TokenizerME(new TokenizerModel(in))
}
override def tokenize(sen: String): Seq[NCNlpCoreToken] =
diff --git a/src/main/scala/org/apache/nlpcraft/common/nlp/core/stanford/NCStanfordTokenizer.scala b/src/main/scala/org/apache/nlpcraft/common/nlp/core/stanford/NCStanfordTokenizer.scala
index 5c4cd3a..55e233c 100644
--- a/src/main/scala/org/apache/nlpcraft/common/nlp/core/stanford/NCStanfordTokenizer.scala
+++ b/src/main/scala/org/apache/nlpcraft/common/nlp/core/stanford/NCStanfordTokenizer.scala
@@ -20,14 +20,13 @@ package org.apache.nlpcraft.common.nlp.core.stanford
import java.io.StringReader
import edu.stanford.nlp.process.PTBTokenizer
-import org.apache.nlpcraft.common.NCService
import org.apache.nlpcraft.common.nlp.core.{NCNlpCoreToken, NCNlpTokenizer}
import scala.collection.JavaConverters._
/**
* Stanford tokenizer implementation.
*/
-object NCStanfordTokenizer extends NCService with NCNlpTokenizer {
+object NCStanfordTokenizer extends NCNlpTokenizer {
override def tokenize(sen: String): Seq[NCNlpCoreToken] = {
PTBTokenizer.newPTBTokenizer(new StringReader(sen)).
tokenize().
diff --git a/src/main/scala/org/apache/nlpcraft/model/impl/NCTokenLogger.scala b/src/main/scala/org/apache/nlpcraft/model/impl/NCTokenLogger.scala
index 2a90afd..a0c3e15 100644
--- a/src/main/scala/org/apache/nlpcraft/model/impl/NCTokenLogger.scala
+++ b/src/main/scala/org/apache/nlpcraft/model/impl/NCTokenLogger.scala
@@ -323,14 +323,14 @@ object NCTokenLogger extends LazyLogging {
s
// User tokens.
- case _ ⇒ s"unid=${getValue("unid")}"
+ case _ ⇒ ""
}
}
val v = if (sorted.lengthCompare(1) > 0) vals2String(sorted) else sorted.map(p ⇒ s"${p._2}").mkString(", ")
if (note.tokenFrom < note.tokenTo)
- s"$v ${s"<${note.tokenFrom} to ${note.tokenTo}, id=${note.id}>"}"
+ s"$v ${s"<${note.tokenFrom} to ${note.tokenTo}>"}"
else
s"$v"
}
diff --git a/src/main/scala/org/apache/nlpcraft/probe/NCProbeBoot.scala b/src/main/scala/org/apache/nlpcraft/probe/NCProbeBoot.scala
index a81cbbb..15093a5 100644
--- a/src/main/scala/org/apache/nlpcraft/probe/NCProbeBoot.scala
+++ b/src/main/scala/org/apache/nlpcraft/probe/NCProbeBoot.scala
@@ -216,8 +216,10 @@ private [probe] object NCProbeBoot extends LazyLogging with NCOpenCensusTrace {
started = false
- if (probeThread != null)
+ if (probeThread != null) {
probeThread.interrupt()
+ probeThread.join()
+ }
logger.info("Embedded probe shutdown OK.")
}
diff --git a/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCDeployManager.scala b/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCDeployManager.scala
index d23ca79..050879a 100644
--- a/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCDeployManager.scala
+++ b/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCDeployManager.scala
@@ -265,6 +265,8 @@ object NCDeployManager extends NCService with DecorateAsScala {
if (isStarted)
modelFactory.terminate()
+ models.clear()
+
super.stop()
}
diff --git a/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/NCProbeEnricher.scala b/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/NCProbeEnricher.scala
index 0b538e5..a0fcb06 100644
--- a/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/NCProbeEnricher.scala
+++ b/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/NCProbeEnricher.scala
@@ -18,7 +18,6 @@
package org.apache.nlpcraft.probe.mgrs.nlp
import java.io.Serializable
-import java.util
import com.typesafe.scalalogging.LazyLogging
import io.opencensus.trace.Span
@@ -26,7 +25,6 @@ import org.apache.nlpcraft.common.nlp._
import org.apache.nlpcraft.common.{NCService, _}
import org.apache.nlpcraft.probe.mgrs.NCModelDecorator
-import scala.collection.JavaConverters._
import scala.collection.{Map, Seq}
import scala.language.implicitConversions
@@ -61,7 +59,7 @@ abstract class NCProbeEnricher extends NCService with LazyLogging {
val notes = pred match {
case Some(p) ⇒ h.filter(p)
- case None ⇒ h.map(n ⇒ n)
+ case None ⇒ h.map(p ⇒ p)
}
notes.filter(!_.isNlp).filter(n ⇒ h.index == n.tokenFrom && l.index == n.tokenTo).map(_.noteType).toSet
@@ -83,46 +81,13 @@ abstract class NCProbeEnricher extends NCService with LazyLogging {
/**
*
- * @param typ
- * @param refNoteName
- * @param refNoteVal
- * @param matched
- */
- protected def hasReference(typ: String, refNoteName: String, refNoteVal: String, matched: Seq[NCNlpSentenceToken]): Boolean =
- matched.forall(t ⇒
- t.isTypeOf(typ) && t.getNotes(typ).exists(n ⇒ n.get(refNoteName) match {
- case Some(s) ⇒ s.asInstanceOf[String] == refNoteVal
- case None ⇒ false
- })
- )
-
- /**
- *
- * @param typ
- * @param refNoteName
- * @param refNoteVals
- * @param matched
- */
- protected def hasReferences(typ: String, refNoteName: String, refNoteVals: Seq[String], matched: Seq[NCNlpSentenceToken]): Boolean =
- matched.forall(t ⇒
- t.isTypeOf(typ) && t.getNotes(typ).exists(n ⇒
- n.get(refNoteName) match {
- case Some(s) ⇒ s.asInstanceOf[util.List[String]].asScala.intersect(refNoteVals).nonEmpty
- case None ⇒ false
- }
- )
- )
-
- /**
- *
* Processes this NLP sentence.
*
* @param mdl Model decorator.
* @param ns NLP sentence to enrich.
* @param senMeta Sentence metadata.
* @param parent Span parent.
- * @return Flag which indicates was the sentence enriched or not.
*/
@throws[NCE]
- def enrich(mdl: NCModelDecorator, ns: NCNlpSentence, senMeta: Map[String, Serializable], parent: Span): Boolean
+ def enrich(mdl: NCModelDecorator, ns: NCNlpSentence, senMeta: Map[String, Serializable], parent: Span): Unit
}
\ No newline at end of file
diff --git a/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/NCProbeEnrichmentManager.scala b/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/NCProbeEnrichmentManager.scala
index 12afc78..69eb11a 100644
--- a/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/NCProbeEnrichmentManager.scala
+++ b/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/NCProbeEnrichmentManager.scala
@@ -28,7 +28,7 @@ import org.apache.nlpcraft.common.NCErrorCodes._
import org.apache.nlpcraft.common._
import org.apache.nlpcraft.common.config.NCConfigurable
import org.apache.nlpcraft.common.debug.NCLogHolder
-import org.apache.nlpcraft.common.nlp.NCNlpSentence
+import org.apache.nlpcraft.common.nlp.{NCNlpSentence, NCNlpSentenceNote}
import org.apache.nlpcraft.model._
import org.apache.nlpcraft.model.impl.{NCModelImpl, NCTokenLogger, NCVariantImpl}
import org.apache.nlpcraft.model.intent.impl.NCIntentSolverInput
@@ -59,7 +59,7 @@ import scala.concurrent.ExecutionContext
* Probe enrichment manager.
*/
object NCProbeEnrichmentManager extends NCService with NCOpenCensusModelStats {
- private final val MAX_NESTED_TOKENS = 256
+ private final val MAX_NESTED_TOKENS = 32
private final val EC = ExecutionContext.fromExecutor(
Executors.newFixedThreadPool(8 * Runtime.getRuntime.availableProcessors())
@@ -346,17 +346,22 @@ object NCProbeEnrichmentManager extends NCService with NCOpenCensusModelStats {
}
val sensSeq = validNlpSens.flatMap(nlpSen ⇒ {
- // Independent from references.
+ // Independent of references.
NCDictionaryEnricher.enrich(mdlDec, nlpSen, senMeta, span)
NCSuspiciousNounsEnricher.enrich(mdlDec, nlpSen, senMeta, span)
NCStopWordEnricher.enrich(mdlDec, nlpSen, senMeta, span)
- def get(name: String, e: NCProbeEnricher): Option[NCProbeEnricher] =
- if (mdlDec.model.getEnabledBuiltInTokens.contains(name)) Some(e) else None
+ case class Holder(enricher: NCProbeEnricher, getNotes: () ⇒ Seq[NCNlpSentenceNote])
+
+ def get(name: String, e: NCProbeEnricher): Option[Holder] =
+ if (mdlDec.model.getEnabledBuiltInTokens.contains(name))
+ Some(Holder(e, () ⇒ nlpSen.flatten.filter(_.noteType == name)))
+ else
+ None
val loopEnrichers =
Seq(
- Some(NCModelEnricher),
+ Some(Holder(NCModelEnricher, () ⇒ nlpSen.flatten.filter(_.isUser))),
get("nlpcraft:aggregation", NCAggregationEnricher),
get("nlpcraft:sort", NCSortEnricher),
get("nlpcraft:limit", NCLimitEnricher),
@@ -372,14 +377,23 @@ object NCProbeEnrichmentManager extends NCService with NCOpenCensusModelStats {
if (step >= MAX_NESTED_TOKENS)
throw new NCE(s"Stack overflow on nested tokens processing (> $MAX_NESTED_TOKENS).")
- val seq = loopEnrichers.map(e ⇒ e → e.enrich(mdlDec, nlpSen, senMeta, span)).
- flatMap { case (e, execRes) ⇒ if (execRes) Some(U.cleanClassName(e.getClass)) else None }
+ val res = loopEnrichers.map(h ⇒ {
+ def get(): Seq[NCNlpSentenceNote] = h.getNotes().sortBy(p ⇒ (p.tokenIndexes.head, p.noteType))
+
+ val notes1 = get()
+
+ h → h.enricher.enrich(mdlDec, nlpSen, senMeta, span)
+
+ val notes2 = get()
+
+ h.enricher → (notes1 == notes2)
+ }).toMap
- continue = seq.nonEmpty
+ continue = res.exists { case (_, same) ⇒ !same }
if (DEEP_DEBUG)
if (continue)
- logger.info(s"Enrichment iteration finished - more needed [step=$step, changed=${seq.mkString(", ")}]")
+ logger.info(s"Enrichment iteration finished - more needed [step=$step, changed=${res.keys.mkString(", ")}]")
else
logger.info(s"Enrichment finished [step=$step]")
}
@@ -389,9 +403,8 @@ object NCProbeEnrichmentManager extends NCService with NCOpenCensusModelStats {
sortBy(p ⇒
p.map(p ⇒ {
val data = p.
- notes.
- filter(!_._2.isNlp).
- flatMap(_._2.values.map(p ⇒ Objects.toString(p._2))).
+ filter(!_.isNlp).
+ map(Objects.toString).
toSeq.
sorted.
mkString("|")
diff --git a/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/aggregation/NCAggregationEnricher.scala b/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/aggregation/NCAggregationEnricher.scala
index 868b540..7884e27 100644
--- a/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/aggregation/NCAggregationEnricher.scala
+++ b/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/aggregation/NCAggregationEnricher.scala
@@ -76,18 +76,18 @@ object NCAggregationEnricher extends NCProbeEnricher {
}
@throws[NCE]
- override def enrich(mdl: NCModelDecorator, ns: NCNlpSentence, senMeta: Map[String, Serializable], parent: Span = null): Boolean =
+ override def enrich(mdl: NCModelDecorator, ns: NCNlpSentence, senMeta: Map[String, Serializable], parent: Span = null): Unit =
startScopedSpan("enrich", parent,
"srvReqId" → ns.srvReqId,
"modelId" → mdl.model.getId,
"txt" → ns.text) { _ ⇒
val buf = mutable.Buffer.empty[Set[NCNlpSentenceToken]]
- var changed: Boolean = false
for (toks ← ns.tokenMixWithStopWords() if areSuitableTokens(buf, toks))
tryToMatch(toks) match {
case Some(m) ⇒
- for (refNote ← m.refNotes if !hasReference(TOK_ID, "note", refNote, m.matched)) {
+ //for (refNote ← m.refNotes if !hasReference(TOK_ID, "note", refNote, m.matched)) {
+ for (refNote ← m.refNotes) {
val note = NCNlpSentenceNote(
m.matched.map(_.index),
TOK_ID,
@@ -98,15 +98,11 @@ object NCAggregationEnricher extends NCProbeEnricher {
m.matched.foreach(_.add(note))
- changed = true
+ buf += toks.toSet
}
- if (changed)
- buf += toks.toSet
case None ⇒ // No-op.
}
-
- changed
}
/**
diff --git a/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/dictionary/NCDictionaryEnricher.scala b/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/dictionary/NCDictionaryEnricher.scala
index c8dde78..b2c4137 100644
--- a/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/dictionary/NCDictionaryEnricher.scala
+++ b/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/dictionary/NCDictionaryEnricher.scala
@@ -54,27 +54,24 @@ object NCDictionaryEnricher extends NCProbeEnricher {
}
@throws[NCE]
- override def enrich(mdl: NCModelDecorator, ns: NCNlpSentence, senMeta: Map[String, Serializable], parent: Span = null): Boolean =
+ override def enrich(mdl: NCModelDecorator, ns: NCNlpSentence, senMeta: Map[String, Serializable], parent: Span = null): Unit =
startScopedSpan("enrich", parent,
"srvReqId" → ns.srvReqId,
"modelId" → mdl.model.getId,
"txt" → ns.text) { _ ⇒
- val res = ns.exists(!_.getNlpNote.contains("dict"))
-
ns.foreach(t ⇒ {
// Dictionary.
val nlpNote = t.getNlpNote
-
- // Single letters seems suspiciously.
- nlpNote += "dict" → (NCDictionaryManager.contains(t.lemma) && t.lemma.length > 1)
-
- // English.
- nlpNote += "english" → t.origText.matches("""[\s\w\p{Punct}]+""")
-
- // Swearwords.
- nlpNote += "swear" → swearWords.contains(t.stem)
- })
- res
+ ns.fixNote(
+ nlpNote,
+ // Single letters seems suspiciously.
+ "dict" → (NCDictionaryManager.contains(t.lemma) && t.lemma.length > 1),
+ // English.
+ "english" → t.origText.matches("""[\s\w\p{Punct}]+"""),
+ // Swearwords.
+ "swear" → swearWords.contains(t.stem)
+ )
+ })
}
}
\ No newline at end of file
diff --git a/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/limit/NCLimitEnricher.scala b/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/limit/NCLimitEnricher.scala
index a5e00e4..4b2648f 100644
--- a/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/limit/NCLimitEnricher.scala
+++ b/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/limit/NCLimitEnricher.scala
@@ -183,13 +183,11 @@ object NCLimitEnricher extends NCProbeEnricher {
}
@throws[NCE]
- override def enrich(mdl: NCModelDecorator, ns: NCNlpSentence, senMeta: Map[String, Serializable], parent: Span = null): Boolean =
+ override def enrich(mdl: NCModelDecorator, ns: NCNlpSentence, senMeta: Map[String, Serializable], parent: Span = null): Unit =
startScopedSpan("enrich", parent,
"srvReqId" → ns.srvReqId,
"modelId" → mdl.model.getId,
"txt" → ns.text) { _ ⇒
- var changed: Boolean = false
-
val numsMap = NCNumericManager.find(ns).filter(_.unit.isEmpty).map(p ⇒ p.tokens → p).toMap
val groupsMap = groupNums(ns, numsMap.values)
@@ -200,33 +198,25 @@ object NCLimitEnricher extends NCProbeEnricher {
for (toks ← ns.tokenMixWithStopWords().sortBy(p ⇒ (-p.size, -p.head.index)) if areSuitableTokens(buf, toks))
tryToMatch(numsMap, groupsMap, toks) match {
case Some(m) ⇒
- for (refNote ← m.refNotes if !hasReference(TOK_ID, "note", refNote, m.matched)) {
- val note = NCNlpSentenceNote(
- m.matched.map(_.index),
- TOK_ID,
- Seq(
- "limit" → m.limit,
- "asc" →
- (m.asc match {
- case Some(a) ⇒ a
- case None ⇒ null
- }),
- "indexes" → m.refIndexes,
- "note" → refNote
- ).filter(_._2 != null): _*
- )
+ //for (refNote ← m.refNotes if !hasReference(TOK_ID, "note", refNote, m.matched)) {
+ for (refNote ← m.refNotes) {
+ val params = mutable.ArrayBuffer.empty[(String, Any)]
- m.matched.foreach(_.add(note))
+ params += "limit" → m.limit
+ params += "indexes" → m.refIndexes
+ params += "note" → refNote
- changed = true
- }
+ if (m.asc.isDefined)
+ params += "asc" → m.asc.get
+
+ val note = NCNlpSentenceNote(m.matched.map(_.index), TOK_ID, params: _*)
+
+ m.matched.foreach(_.add(note))
- if (changed)
buf += toks.toSet
+ }
case None ⇒ // No-op.
}
-
- changed
}
/**
*
diff --git a/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala b/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
index cfb2cc9..a3c08ce 100644
--- a/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
+++ b/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
@@ -170,6 +170,7 @@ object NCModelEnricher extends NCProbeEnricher with DecorateAsScala {
/**
*
+ * @param ns
* @param elem
* @param toks
* @param direct
@@ -178,6 +179,7 @@ object NCModelEnricher extends NCProbeEnricher with DecorateAsScala {
* @param parts
*/
private def mark(
+ ns: NCNlpSentence,
elem: NCElement,
toks: Seq[NCNlpSentenceToken],
direct: Boolean,
@@ -225,7 +227,7 @@ object NCModelEnricher extends NCProbeEnricher with DecorateAsScala {
toks.foreach(_.add(note))
// For NLP elements.
- toks.foreach(_.getNlpNote += "direct" → direct)
+ toks.foreach(t ⇒ ns.fixNote(t.getNlpNote, "direct" → direct))
}
/**
@@ -297,7 +299,7 @@ object NCModelEnricher extends NCProbeEnricher with DecorateAsScala {
private def alreadyMarked(toks: Seq[NCNlpSentenceToken], elemId: String): Boolean = toks.forall(_.isTypeOf(elemId))
@throws[NCE]
- override def enrich(mdl: NCModelDecorator, ns: NCNlpSentence, senMeta: Map[String, Serializable], parent: Span = null): Boolean =
+ override def enrich(mdl: NCModelDecorator, ns: NCNlpSentence, senMeta: Map[String, Serializable], parent: Span = null): Unit =
startScopedSpan("enrich", parent,
"srvReqId" → ns.srvReqId,
"modelId" → mdl.model.getId,
@@ -305,7 +307,6 @@ object NCModelEnricher extends NCProbeEnricher with DecorateAsScala {
val jiggleFactor = mdl.model.getJiggleFactor
val cache = mutable.HashSet.empty[Seq[Int]]
val matches = ArrayBuffer.empty[ElementMatch]
- var changed: Boolean = false
/**
* Gets sequence of synonyms sorted in descending order by their weight, i.e. first synonym in
@@ -429,9 +430,7 @@ object NCModelEnricher extends NCProbeEnricher with DecorateAsScala {
val tokIdxs = m.tokens.map(_.index)
val direct = syn.isDirect && (tokIdxs == tokIdxs.sorted)
- changed = true
-
- mark(elem = elm, toks = m.tokens, direct = direct, syn = Some(syn), metaOpt = None, parts = m.parts)
+ mark(ns, elem = elm, toks = m.tokens, direct = direct, syn = Some(syn), metaOpt = None, parts = m.parts)
}
val parsers = mdl.model.getParsers
@@ -497,10 +496,9 @@ object NCModelEnricher extends NCProbeEnricher with DecorateAsScala {
).getOrElse(throw new AssertionError(s"Custom model parser returned an invalid custom token: $w"))
)
- if (!alreadyMarked(matchedToks, elemId)) {
- changed = true
-
+ if (!alreadyMarked(matchedToks, elemId))
mark(
+ ns,
elem = mdl.elements.getOrElse(elemId, throw new NCE(s"Custom model parser returned unknown element ID: $elemId")),
toks = matchedToks,
direct = true,
@@ -508,13 +506,10 @@ object NCModelEnricher extends NCProbeEnricher with DecorateAsScala {
metaOpt = Some(e.getMetadata.asScala),
parts = Seq.empty
)
- }
})
}
parser.onDiscard()
}
-
- changed
}
}
diff --git a/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/post/NCPostEnrichProcessor.scala b/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/post/NCPostEnrichProcessor.scala
index c7d94c5..939a43a 100644
--- a/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/post/NCPostEnrichProcessor.scala
+++ b/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/post/NCPostEnrichProcessor.scala
@@ -200,19 +200,14 @@ object NCPostEnrichProcessor extends NCService with LazyLogging {
*
* @param ns Sentence.
* @param notNlpTypes Token types.
- * @param idCache ID cache.
*/
- private def collapse(
- ns: NCNlpSentence,
- notNlpTypes: Seq[String],
- idCache: mutable.HashMap[String, String]
- ): Boolean = {
+ private def collapse(ns: NCNlpSentence, notNlpTypes: Seq[String]): Boolean = {
ns.
filter(!_.isNlp).
filter(_.isStopWord).
flatten.
filter(_.isNlp).
- foreach(_ += "stopWord" → false)
+ foreach(n ⇒ ns.fixNote(n, "stopWord" → false))
val nsNotes: Map[String, Seq[Int]] = ns.tokens.flatten.map(p ⇒ p.noteType → p.tokenIndexes).toMap
@@ -221,13 +216,13 @@ object NCPostEnrichProcessor extends NCService with LazyLogging {
stopReason ← t.stopsReasons
if nsNotes.getOrElse(stopReason.noteType, Seq.empty) == stopReason.tokenIndexes
)
- t.markAsStop()
+ ns.fixNote(t.getNlpNote, "stopWord" → true)
val history = mutable.ArrayBuffer.empty[(Int, Int)]
- notNlpTypes.foreach(typ ⇒ zipNotes(ns, typ, notNlpTypes, history, idCache))
+ notNlpTypes.foreach(typ ⇒ zipNotes(ns, typ, notNlpTypes, history))
- unionStops(ns, notNlpTypes, history, idCache)
+ unionStops(ns, notNlpTypes, history)
val res =
Seq("nlpcraft:aggregation", "nlpcraft:relation", "nlpcraft:limit").
@@ -238,9 +233,8 @@ object NCPostEnrichProcessor extends NCService with LazyLogging {
if (res)
// Validation (all indexes calculated well)
require(
- !ns.
- flatten.
- exists(n ⇒ ns.filter(_.wordIndexes.exists(n.wordIndexes.contains)).exists(p ⇒ !p.contains(n.id))),
+ !ns.flatten.
+ exists(n ⇒ ns.filter(_.wordIndexes.exists(n.wordIndexes.contains)).exists(t ⇒ !t.contains(n))),
s"Invalid sentence:\n" +
ns.map(t ⇒
// Human readable invalid sentence for debugging.
@@ -258,10 +252,10 @@ object NCPostEnrichProcessor extends NCService with LazyLogging {
* @param ns
* @param idxs
* @param notesType
- * @param id
+ * @param note
* @return
*/
- private def checkRelation(ns: NCNlpSentence, idxs: Seq[Int], notesType: String, id: String): Boolean = {
+ private def checkRelation(ns: NCNlpSentence, idxs: Seq[Int], notesType: String, note: NCNlpSentenceNote): Boolean = {
val types =
idxs.flatMap(idx ⇒ {
val types = ns(idx).map(p ⇒ p).filter(!_.isNlp).map(_.noteType)
@@ -296,7 +290,7 @@ object NCPostEnrichProcessor extends NCService with LazyLogging {
if (types.size == 1)
false
else {
- ns.removeNote(id)
+ ns.removeNote(note)
true
}
@@ -324,7 +318,7 @@ object NCPostEnrichProcessor extends NCService with LazyLogging {
fixed = fixed.distinct
if (idxs != fixed) {
- n += "indexes" → fixed.asJava.asInstanceOf[java.io.Serializable]
+ ns.fixNote(n, "indexes" → fixed.asJava.asInstanceOf[java.io.Serializable])
def x(seq: Seq[Int]): String = s"[${seq.mkString(", ")}]"
@@ -335,7 +329,7 @@ object NCPostEnrichProcessor extends NCService with LazyLogging {
)
ns.flatMap(_.getNotes(noteType)).forall(
- n ⇒ checkRelation(ns, n.data[java.util.List[Int]]("indexes").asScala, n.data[String]("note"), n.id)
+ n ⇒ checkRelation(ns, n.data[java.util.List[Int]]("indexes").asScala, n.data[String]("note"), n)
)
}
@@ -369,7 +363,7 @@ object NCPostEnrichProcessor extends NCService with LazyLogging {
if (fixed.forall(_.size == 1)) {
// Fix double dimension array to one dimension,
// so it should be called always inspite of fixIndexesReferences method.
- n += idxsField → fixed.map(_.head).asJava.asInstanceOf[java.io.Serializable]
+ ns.fixNote(n, idxsField → fixed.map(_.head).asJava.asInstanceOf[java.io.Serializable])
def x(seq: Seq[Seq[Int]]): String = s"[${seq.map(p ⇒ s"[${p.mkString(",")}]").mkString(", ")}]"
@@ -389,7 +383,7 @@ object NCPostEnrichProcessor extends NCService with LazyLogging {
require(idxsList.size() == notesTypes.size())
idxsList.asScala.zip(notesTypes.asScala).forall {
- case (idxs, notesType) ⇒ checkRelation(ns, Seq(idxs), notesType, rel.id)
+ case (idxs, notesType) ⇒ checkRelation(ns, Seq(idxs), notesType, rel)
}
case None ⇒ true
}
@@ -403,14 +397,12 @@ object NCPostEnrichProcessor extends NCService with LazyLogging {
* @param nType Notes type.
* @param userNotesTypes Notes types.
* @param history Indexes transformation history.
- * @param idCache ID cache.
*/
private def zipNotes(
ns: NCNlpSentence,
nType: String,
userNotesTypes: Seq[String],
- history: mutable.ArrayBuffer[(Int, Int)],
- idCache: mutable.HashMap[String, String]
+ history: mutable.ArrayBuffer[(Int, Int)]
): Unit = {
val nts = ns.getNotes(nType).filter(n ⇒ n.tokenFrom != n.tokenTo).sortBy(_.tokenFrom)
@@ -431,7 +423,7 @@ object NCPostEnrichProcessor extends NCService with LazyLogging {
if (!buf.contains(n.tokenFrom)) {
buf += n.tokenFrom
- ns += mkCompound(ns, nsCopyToks, n.tokenIndexes, stop = false, ns.size, Some(n), history, idCache)
+ ns += mkCompound(ns, nsCopyToks, n.tokenIndexes, stop = false, ns.size, Some(n), history)
}
case None ⇒ simpleCopy(ns, history, nsCopyToks, i)
}
@@ -446,13 +438,11 @@ object NCPostEnrichProcessor extends NCService with LazyLogging {
* @param ns Sentence.
* @param userNoteTypes Notes types.
* @param history Indexes transformation history.
- * @param idCache ID cache.
*/
private def unionStops(
ns: NCNlpSentence,
userNoteTypes: Seq[String],
- history: mutable.ArrayBuffer[(Int, Int)],
- idCache: mutable.HashMap[String, String]
+ history: mutable.ArrayBuffer[(Int, Int)]
): Unit = {
// Java collection used because using scala collections (mutable.Buffer.empty[mutable.Buffer[Token]]) is reason
// Of compilation errors which seems as scala compiler internal error.
@@ -485,7 +475,7 @@ object NCPostEnrichProcessor extends NCService with LazyLogging {
if (!buf.contains(idxs.head)) {
buf += idxs.head
- ns += mkCompound(ns, nsCopyToks, idxs, stop = true, ns.size, None, history, idCache)
+ ns += mkCompound(ns, nsCopyToks, idxs, stop = true, ns.size, None, history)
}
case None ⇒ simpleCopy(ns, history, nsCopyToks, i)
}
@@ -519,20 +509,18 @@ object NCPostEnrichProcessor extends NCService with LazyLogging {
private def fixIndexes(ns: NCNlpSentence, userNoteTypes: Seq[String]) {
// Replaces other notes indexes.
for (t ← userNoteTypes :+ "nlpcraft:nlp"; note ← ns.getNotes(t)) {
- val id = note.id
-
- val toks = ns.filter(_.contains(id)).sortBy(_.index)
+ val toks = ns.filter(_.contains(note)).sortBy(_.index)
val newNote = note.clone(toks.map(_.index), toks.flatMap(_.wordIndexes).sorted)
toks.foreach(t ⇒ {
- t.remove(id)
+ t.remove(note)
t.add(newNote)
})
}
// Special case - field index of core NLP note.
- ns.zipWithIndex.foreach { case (tok, idx) ⇒ tok.getNlpNote += "index" → idx }
+ ns.zipWithIndex.foreach { case (tok, idx) ⇒ ns.fixNote(tok.getNlpNote, "index" → idx) }
}
/**
@@ -545,7 +533,6 @@ object NCPostEnrichProcessor extends NCService with LazyLogging {
* @param idx Index.
* @param commonNote Common note.
* @param history Indexes transformation history.
- * @param idCache ID cache.
*/
private def mkCompound(
ns: NCNlpSentence,
@@ -554,8 +541,7 @@ object NCPostEnrichProcessor extends NCService with LazyLogging {
stop: Boolean,
idx: Int,
commonNote: Option[NCNlpSentenceNote],
- history: mutable.ArrayBuffer[(Int, Int)],
- idCache: mutable.HashMap[String, String]
+ history: mutable.ArrayBuffer[(Int, Int)]
): NCNlpSentenceToken = {
val t = NCNlpSentenceToken(idx)
@@ -613,27 +599,14 @@ object NCPostEnrichProcessor extends NCService with LazyLogging {
"swear" → nsCopyToks.exists(_.getNlpNote.data[Boolean]("swear"))
)
- val complexId = content.map(_.getNlpNote.id).mkString(" ")
-
- val id =
- idCache.get(complexId) match {
- case Some(cachedId) ⇒ cachedId
- case None ⇒
- val id = U.genGuid()
-
- idCache += complexId → id
-
- id
- }
-
- val nlpNote = NCNlpSentenceNote(id, idxs, wordIdxs, "nlpcraft:nlp", params: _*)
+ val nlpNote = NCNlpSentenceNote(idxs, wordIdxs, "nlpcraft:nlp", params: _*)
t.add(nlpNote)
// Adds processed note with fixed indexes.
commonNote match {
case Some(n) ⇒
- ns.removeNote(n.id)
+ ns.removeNote(n)
t.add(n.clone(idxs, wordIdxs))
case None ⇒ // No-op.
}
@@ -658,7 +631,6 @@ object NCPostEnrichProcessor extends NCService with LazyLogging {
// Some words with same note type can be detected various ways.
// We keep only one variant - with `best` direct and sparsity parameters,
// other variants for these words are redundant.
- val idCache = mutable.HashMap.empty[String, String]
val redundant: Seq[NCNlpSentenceNote] =
ns.flatten.filter(!_.isNlp).distinct.
@@ -677,7 +649,7 @@ object NCPostEnrichProcessor extends NCService with LazyLogging {
flatMap(_.drop(1)).
toSeq
- redundant.map(_.id).foreach(ns.removeNote)
+ redundant.foreach(ns.removeNote)
def getNotNlpNotes(toks: Seq[NCNlpSentenceToken]): Seq[NCNlpSentenceNote] =
toks.flatten.filter(!_.isNlp).distinct
@@ -711,7 +683,7 @@ object NCPostEnrichProcessor extends NCService with LazyLogging {
if (!deleted.exists(_.forall(delComb.contains))) {
val nsClone = ns.clone()
- delComb.map(_.id).foreach(nsClone.removeNote)
+ delComb.foreach(nsClone.removeNote)
// Has overlapped notes for some tokens.
require(!nsClone.exists(_.count(!_.isNlp) > 1))
@@ -720,7 +692,7 @@ object NCPostEnrichProcessor extends NCService with LazyLogging {
val notNlpTypes = getNotNlpNotes(nsClone).map(_.noteType).distinct
- if (collapse(nsClone, notNlpTypes, idCache)) Some(nsClone) else None
+ if (collapse(nsClone, notNlpTypes)) Some(nsClone) else None
}
else
None
@@ -729,28 +701,25 @@ object NCPostEnrichProcessor extends NCService with LazyLogging {
// Removes sentences which have only one difference - 'direct' flag of their user tokens.
// `Direct` sentences have higher priority.
case class Key(
- sysNotes: Seq[mutable.HashMap[String, java.io.Serializable]],
- userNotes: Seq[mutable.HashMap[String, java.io.Serializable]]
+ sysNotes: Seq[Map[String, java.io.Serializable]],
+ userNotes: Seq[Map[String, java.io.Serializable]]
)
case class Value(sentence: NCNlpSentence, directCount: Int)
val m = mutable.HashMap.empty[Key, Value]
sens.map(sen ⇒ {
- val sysNotes = sen.flatten.filter(_.isSystem)
- val nlpNotes = sen.flatten.filter(_.isNlp)
- val userNotes = sen.flatten.filter(_.isUser)
+ val notes = sen.flatten
- def get(seq: Seq[NCNlpSentenceNote], keys2Skip: String*): Seq[mutable.HashMap[String, java.io.Serializable]] =
- seq.map(p ⇒ {
- val m: mutable.HashMap[String, java.io.Serializable] = p.clone()
+ val sysNotes = notes.filter(_.isSystem)
+ val nlpNotes = notes.filter(_.isNlp)
+ val userNotes = notes.filter(_.isUser)
+ def get(seq: Seq[NCNlpSentenceNote], keys2Skip: String*): Seq[Map[String, java.io.Serializable]] =
+ seq.map(p ⇒
// We have to delete some keys to have possibility to compare sentences.
- m.remove("unid")
- m.remove("direct")
-
- m
- })
+ p.clone().filter(_._1 != "direct")
+ )
(Key(get(sysNotes), get(userNotes)), sen, nlpNotes.map(p ⇒ if (p.isDirect) 0 else 1).sum)
}).
@@ -767,7 +736,7 @@ object NCPostEnrichProcessor extends NCService with LazyLogging {
m.values.map(_.sentence).toSeq
}
else {
- if (collapse(ns, getNotNlpNotes(ns).map(_.noteType).distinct, idCache)) Seq(ns) else Seq.empty
+ if (collapse(ns, getNotNlpNotes(ns).map(_.noteType).distinct)) Seq(ns) else Seq.empty
}.distinct
sens.foreach(sen ⇒
diff --git a/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/relation/NCRelationEnricher.scala b/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/relation/NCRelationEnricher.scala
index 988cc18..7fc2265 100644
--- a/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/relation/NCRelationEnricher.scala
+++ b/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/relation/NCRelationEnricher.scala
@@ -116,12 +116,11 @@ object NCRelationEnricher extends NCProbeEnricher {
}
@throws[NCE]
- override def enrich(mdl: NCModelDecorator, ns: NCNlpSentence, senMeta: Map[String, Serializable], parent: Span = null): Boolean =
+ override def enrich(mdl: NCModelDecorator, ns: NCNlpSentence, senMeta: Map[String, Serializable], parent: Span = null): Unit =
startScopedSpan("enrich", parent,
"srvReqId" → ns.srvReqId,
"modelId" → mdl.model.getId,
"txt" → ns.text) { _ ⇒
- var changed: Boolean = false
val buf = mutable.Buffer.empty[Set[NCNlpSentenceToken]]
// Tries to grab tokens direct way.
@@ -129,7 +128,8 @@ object NCRelationEnricher extends NCProbeEnricher {
for (toks ← ns.tokenMixWithStopWords() if areSuitableTokens(buf, toks))
tryToMatch(toks) match {
case Some(m) ⇒
- for (refNote ← m.refNotes if !hasReference(TOK_ID, "note", refNote, Seq(m.matched.head))) {
+ //for (refNote ← m.refNotes if !hasReference(TOK_ID, "note", refNote, Seq(m.matched.head))) {
+ for (refNote ← m.refNotes) {
val note = NCNlpSentenceNote(
Seq(m.matchedHead.index),
TOK_ID,
@@ -138,19 +138,14 @@ object NCRelationEnricher extends NCProbeEnricher {
"note" → refNote
)
- m.matchedHead.add(note)
-
m.matched.filter(_ != m.matchedHead).foreach(_.addStopReason(note))
- changed = true
- }
+ m.matchedHead.add(note)
- if (changed)
buf += toks.toSet
+ }
case None ⇒ // No-op.
}
-
- changed
}
/**
@@ -182,7 +177,7 @@ object NCRelationEnricher extends NCProbeEnricher {
if (suitNotes.nonEmpty)
Some(
Reference(
- toks.filter(t ⇒ suitNotes.exists(t.notes.values.toSet.contains)),
+ toks.filter(t ⇒ suitNotes.exists(t.contains)),
suitNotes.map(_.noteType).toSet
)
)
diff --git a/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/sort/NCSortEnricher.scala b/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/sort/NCSortEnricher.scala
index 5f2253c..5150c9e 100644
--- a/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/sort/NCSortEnricher.scala
+++ b/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/sort/NCSortEnricher.scala
@@ -323,18 +323,18 @@ object NCSortEnricher extends NCProbeEnricher {
}
}
- override def enrich(mdl: NCModelDecorator, ns: NCNlpSentence, meta: Map[String, Serializable], parent: Span): Boolean =
+ override def enrich(mdl: NCModelDecorator, ns: NCNlpSentence, meta: Map[String, Serializable], parent: Span): Unit =
startScopedSpan("enrich", parent,
"srvReqId" → ns.srvReqId,
"modelId" → mdl.model.getId,
"txt" → ns.text) { _ ⇒
val buf = mutable.Buffer.empty[Set[NCNlpSentenceToken]]
- var changed: Boolean = false
for (toks ← ns.tokenMixWithStopWords() if areSuitableTokens(buf, toks))
tryToMatch(toks) match {
case Some(m) ⇒
- for (subj ← m.subjSeq if !hasReferences(TOK_ID, "subjNotes", subj.map(_.note), m.main)) {
+ //for (subj ← m.subjSeq if !hasReferences(TOK_ID, "subjNotes", subj.map(_.note), m.main)) {
+ for (subj ← m.subjSeq) {
def addNotes(
params: ArrayBuffer[(String, Any)],
seq: Seq[NoteData],
@@ -362,7 +362,7 @@ object NCSortEnricher extends NCProbeEnricher {
m.main.foreach(_.add(note))
m.stop.foreach(_.addStopReason(note))
- changed = true
+ buf += toks.toSet
}
if (m.bySeq.nonEmpty)
@@ -371,13 +371,8 @@ object NCSortEnricher extends NCProbeEnricher {
else
mkNote(mkParams())
}
-
- if (changed)
- buf += toks.toSet
case None ⇒ // No-op.
}
-
- changed
}
override def start(parent: Span = null): NCService = startScopedSpan("start", parent) { _ ⇒
diff --git a/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/stopword/NCStopWordEnricher.scala b/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/stopword/NCStopWordEnricher.scala
index 5507996..defb533 100644
--- a/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/stopword/NCStopWordEnricher.scala
+++ b/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/stopword/NCStopWordEnricher.scala
@@ -86,33 +86,20 @@ object NCStopWordEnricher extends NCProbeEnricher {
* @param sen Sentence.
* @param noteType Note type.
*/
- private def markBefore(sen: NCNlpSentence, noteType: String): Boolean = {
- var res = false
-
- for (note ← sen.getNotes(noteType) if note.tokenFrom > 0) {
- val part = sen.
+ private def markBefore(sen: NCNlpSentence, noteType: String): Unit =
+ for (note ← sen.getNotes(noteType) if note.tokenFrom > 0)
+ sen.
take(note.tokenFrom).
reverse.
takeWhile(t ⇒ t.isStopWord || t.isNlp && POSES.contains(t.pos)).
- filter(!_.isStopWord)
-
- if (part.nonEmpty) {
- part.foreach(_.addStopReason(note))
-
- res = true
- }
- }
-
- res
- }
+ filter(!_.isStopWord).foreach(_.addStopReason(note))
/**
* Processes geo tokens. Sets additional stopwords.
*
* @param ns Sentence.
*/
- private def processGeo(ns: NCNlpSentence): Boolean = {
- var res = false
+ private def processGeo(ns: NCNlpSentence): Unit = {
// 1. Marks some specific words before GEO (like 'origin for London')
for (note ← GEO_TYPES.flatMap(ns.getNotes)) {
@@ -126,14 +113,7 @@ object NCStopWordEnricher extends NCProbeEnricher {
val stems = toks.map(_.stem)
GEO_PRE_WORDS.find(stems.endsWith) match {
- case Some(words) ⇒
- val part = toks.reverse.take(words.size).filter(!_.isStopWord)
-
- if (part.nonEmpty) {
- res = true
-
- part.foreach(_.addStopReason(note))
- }
+ case Some(words) ⇒ toks.reverse.take(words.size).filter(!_.isStopWord).foreach(_.addStopReason(note))
case None ⇒ // No-op.
}
}
@@ -144,11 +124,7 @@ object NCStopWordEnricher extends NCProbeEnricher {
for (geoNote ← ns.getNotes(typ)) {
def process(toks: Seq[NCNlpSentenceToken]): Unit =
toks.find(!_.isStopWord) match {
- case Some(t) ⇒ if (stops.contains(t.stem)) {
- res = true
-
- t.addStopReason(geoNote)
- }
+ case Some(t) ⇒ if (stops.contains(t.stem)) t.addStopReason(geoNote)
case None ⇒ // No-op.
}
@@ -158,12 +134,7 @@ object NCStopWordEnricher extends NCProbeEnricher {
}
// 3. Marks stop-words like prepositions before.
- GEO_TYPES.foreach(t ⇒ {
- if (markBefore(ns, t))
- res = true
- })
-
- res
+ GEO_TYPES.foreach(t ⇒ markBefore(ns, t))
}
/**
@@ -171,9 +142,7 @@ object NCStopWordEnricher extends NCProbeEnricher {
*
* @param ns Sentence.
*/
- private def processNums(ns: NCNlpSentence): Boolean = {
- var res = false
-
+ private def processNums(ns: NCNlpSentence): Unit =
// Try to find words from configured list before numeric condition and mark them as STOP words.
ns.getNotes("nlpcraft:num").foreach(numNote ⇒ {
val before = ns.filter(_.index < numNote.tokenFrom)
@@ -185,39 +154,27 @@ object NCStopWordEnricher extends NCProbeEnricher {
(!t.isBracketed && !t.isQuoted)) &&
NUM_PREFIX_STOPS.contains(seq.filter(!_.isStopWord).map(_.stem).mkString(" "))
) match {
- case Some(seq) ⇒
- val toks = seq.filter(!_.isStopWord)
-
- if (toks.nonEmpty) {
- res = true
-
- toks.foreach(_.addStopReason(numNote))
- }
+ case Some(seq) ⇒ seq.filter(!_.isStopWord).foreach(_.addStopReason(numNote))
case None ⇒ // No-op.
}
})
- res
- }
-
/**
* Processes dates. Sets additional stopwords.
*
* @param ns Sentence.
*/
- private def processDate(ns: NCNlpSentence): Boolean = markBefore(ns, "nlpcraft:date")
+ private def processDate(ns: NCNlpSentence): Unit = markBefore(ns, "nlpcraft:date")
/**
* Marks as stopwords, words with POS from configured list, which also placed before another stop words.
*/
- private def processCommonStops(mdl: NCModelDecorator, ns: NCNlpSentence): Boolean = {
- var res = false
-
+ private def processCommonStops(mdl: NCModelDecorator, ns: NCNlpSentence): Unit = {
/**
* Marks as stopwords, words with POS from configured list, which also placed before another stop words.
*/
@tailrec
- def processCommonStops0(mdl: NCModelDecorator, ns: NCNlpSentence): Boolean = {
+ def processCommonStops0(mdl: NCModelDecorator, ns: NCNlpSentence): Unit = {
val max = ns.size - 1
var stop = true
@@ -229,44 +186,31 @@ object NCStopWordEnricher extends NCProbeEnricher {
POSES.contains(tok.pos) &&
ns(idx + 1).isStopWord
) {
- tok.markAsStop()
-
- res = true
+ ns.fixNote(tok.getNlpNote, "stopWord" → true)
stop = false
}
- if (stop) true else processCommonStops0(mdl, ns)
+ if (!stop)
+ processCommonStops0(mdl, ns)
}
processCommonStops0(mdl, ns)
-
- res
}
@throws[NCE]
- override def enrich(mdl: NCModelDecorator, ns: NCNlpSentence, senMeta: Map[String, Serializable], parent: Span = null): Boolean = {
- def mark(stems: Set[String], f: Boolean): Boolean = {
- val part = ns.filter(t ⇒ stems.contains(t.stem))
-
- if (part.nonEmpty) {
- part.foreach(_.getNlpNote += "stopWord" → f)
+ override def enrich(mdl: NCModelDecorator, ns: NCNlpSentence, senMeta: Map[String, Serializable], parent: Span = null): Unit = {
+ def mark(stems: Set[String], f: Boolean): Unit =
+ ns.filter(t ⇒ stems.contains(t.stem)).foreach(t ⇒ ns.fixNote(t.getNlpNote, "stopWord" → f))
- true
- }
- else
- false
- }
-
startScopedSpan("enrich", parent, "srvReqId" → ns.srvReqId, "modelId" → mdl.model.getId, "txt" → ns.text) { _ ⇒
- Seq(
- mark(mdl.excludedStopWordsStems, f = false),
- mark(mdl.additionalStopWordsStems, f = true),
- processGeo(ns),
- processDate(ns),
- processNums(ns),
- processCommonStops(mdl, ns)
- ).contains(true)
+
+ mark(mdl.excludedStopWordsStems, f = false)
+ mark(mdl.additionalStopWordsStems, f = true)
+ processGeo(ns)
+ processDate(ns)
+ processNums(ns)
+ processCommonStops(mdl, ns)
}
}
}
diff --git a/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/suspicious/NCSuspiciousNounsEnricher.scala b/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/suspicious/NCSuspiciousNounsEnricher.scala
index 0798d06..be05916 100644
--- a/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/suspicious/NCSuspiciousNounsEnricher.scala
+++ b/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/suspicious/NCSuspiciousNounsEnricher.scala
@@ -40,19 +40,11 @@ object NCSuspiciousNounsEnricher extends NCProbeEnricher {
}
@throws[NCE]
- override def enrich(mdl: NCModelDecorator, ns: NCNlpSentence, senMeta: Map[String, Serializable], parent: Span = null): Boolean =
+ override def enrich(mdl: NCModelDecorator, ns: NCNlpSentence, senMeta: Map[String, Serializable], parent: Span = null): Unit =
startScopedSpan("enrich", parent,
"srvReqId" → ns.srvReqId,
"modelId" → mdl.model.getId,
"txt" → ns.text) { _ ⇒
- val suspToks = ns.filter(t ⇒ mdl.suspiciousWordsStems.contains(t.stem))
-
- if (suspToks.nonEmpty) {
- suspToks.foreach(_.getNlpNote += "suspNoun" → true)
-
- true
- }
- else
- false
+ ns.filter(t ⇒ mdl.suspiciousWordsStems.contains(t.stem)).foreach(t ⇒ ns.fixNote(t.getNlpNote, "suspNoun" → true))
}
}
diff --git a/src/main/scala/org/apache/nlpcraft/server/nlp/core/opennlp/NCOpenNlpNerEnricher.scala b/src/main/scala/org/apache/nlpcraft/server/nlp/core/opennlp/NCOpenNlpNerEnricher.scala
index 5b4e96c..c5f8c94 100644
--- a/src/main/scala/org/apache/nlpcraft/server/nlp/core/opennlp/NCOpenNlpNerEnricher.scala
+++ b/src/main/scala/org/apache/nlpcraft/server/nlp/core/opennlp/NCOpenNlpNerEnricher.scala
@@ -68,18 +68,11 @@ object NCOpenNlpNerEnricher extends NCService with NCNlpNerEnricher with NCIgnit
catching(wrapIE) {
cache = ignite.cache[String, Array[String]]("opennlp-cache")
}
-
- // Should be started even another NLP engine configured.
- if (!NCOpenNlpTokenizer.isStarted)
- NCOpenNlpTokenizer.start()
-
+
super.start()
}
override def stop(parent: Span = null): Unit = startScopedSpan("stop", parent) { _ ⇒
- if (NCOpenNlpTokenizer.isStarted)
- NCOpenNlpTokenizer.stop(parent)
-
cache = null
super.stop()
diff --git a/src/main/scala/org/apache/nlpcraft/server/nlp/core/opennlp/NCOpenNlpParser.scala b/src/main/scala/org/apache/nlpcraft/server/nlp/core/opennlp/NCOpenNlpParser.scala
index ff5b4db..b04cf93 100644
--- a/src/main/scala/org/apache/nlpcraft/server/nlp/core/opennlp/NCOpenNlpParser.scala
+++ b/src/main/scala/org/apache/nlpcraft/server/nlp/core/opennlp/NCOpenNlpParser.scala
@@ -61,17 +61,10 @@ object NCOpenNlpParser extends NCService with NCNlpParser with NCIgniteInstance
cache = ignite.cache[String, Array[String]]("opennlp-cache")
}
- // Should be started even another NLP engine configured.
- if (!NCOpenNlpTokenizer.isStarted)
- NCOpenNlpTokenizer.start()
-
super.start()
}
override def stop(parent: Span = null): Unit = startScopedSpan("stop", parent) { _ ⇒
- if (NCOpenNlpTokenizer.isStarted)
- NCOpenNlpTokenizer.stop(parent)
-
cache = null
super.stop()
diff --git a/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/date/NCDateEnricher.scala b/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/date/NCDateEnricher.scala
index 8b8bce1..5908f1a 100644
--- a/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/date/NCDateEnricher.scala
+++ b/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/date/NCDateEnricher.scala
@@ -461,9 +461,7 @@ object NCDateEnricher extends NCServerEnricher {
seq.foreach(n ⇒ {
val r = convertRange(mkDateRange(n))
- n += "from" → r.from
- n += "to" → r.to
- n += "periods" → new util.ArrayList[String]()
+ ns.fixNote(n, "from" → r.from, "to" → r.to, "periods" → new util.ArrayList[String]())
})
def optHolder(b: Boolean) = if (b) Some(base) else None
@@ -548,9 +546,9 @@ object NCDateEnricher extends NCServerEnricher {
before: Option[NCNlpSentenceNote] = None,
after: Option[NCNlpSentenceNote] = None) {
if (!compressNotes(ns, seq, before, after)) {
- def remove(hOpt: Option[NCNlpSentenceNote]): Unit =
- hOpt match {
- case Some(h) ⇒ ns.removeNote(h.id)
+ def remove(nOpt: Option[NCNlpSentenceNote]): Unit =
+ nOpt match {
+ case Some(h) ⇒ ns.removeNote(h)
case None ⇒ // No-op.
}
@@ -594,7 +592,7 @@ object NCDateEnricher extends NCServerEnricher {
}
private def removeDuplicates(ns: NCNlpSentence): Unit = {
- val ids = findNeighbours(ns, andSupport = false).flatMap(g ⇒ {
+ val notes = findNeighbours(ns, andSupport = false).flatMap(g ⇒ {
case class H(from: Long, to: Long) {
override def equals(obj: scala.Any): Boolean = obj match {
case v: H ⇒ v.from == from && v.to == to
@@ -612,10 +610,10 @@ object NCDateEnricher extends NCServerEnricher {
grouped.map(_._2.sortBy(h ⇒ -h("periods").asInstanceOf[java.util.List[String]].asScala.length))
// First holder will be kept in group, others (tail) should be deleted.
- hs.map(_.tail).flatMap(_.map(_.id))
+ hs.flatMap(_.tail)
})
- ids.foreach(ns.removeNote)
+ notes.foreach(ns.removeNote)
}
private def mkCalendar(d: Long) = {
diff --git a/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/geo/NCGeoEnricher.scala b/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/geo/NCGeoEnricher.scala
index 8325172..dc93989 100644
--- a/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/geo/NCGeoEnricher.scala
+++ b/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/geo/NCGeoEnricher.scala
@@ -180,7 +180,7 @@ object NCGeoEnricher extends NCServerEnricher {
// Other types(JJ etc) and quoted word are not re-marked.
toks.filter(t ⇒ !NCPennTreebank.NOUNS_POS.contains(t.pos) && t.pos != "FW").
- foreach(t ⇒ t.getNlpNote += "pos" → NCPennTreebank.SYNTH_POS)
+ foreach(t ⇒ ns.fixNote(t.getNlpNote, "pos" → NCPennTreebank.SYNTH_POS))
}
LOCATIONS.get(toks.map(_.normText).mkString(" ")) match {
@@ -331,8 +331,7 @@ object NCGeoEnricher extends NCServerEnricher {
// Also added tokens with very short GEO names (with length is 1)
excls ++= getGeoNotes(ns).filter(note ⇒ getName(extractKind(note), note).length == 1)
- def removeNote(n: NCNlpSentenceNote): Unit =
- ns.removeNote(n.id)
+ def removeNote(n: NCNlpSentenceNote): Unit = ns.removeNote(n)
// Check that city is inside country or region.
// When true - remove larger location note and replace with
@@ -464,8 +463,7 @@ object NCGeoEnricher extends NCServerEnricher {
val sortedByKind = sorted.groupBy(_.kind)
// Keeps best candidates for each GEO kind.
- val remainHs = sortedByKind.
- unzip._2.
+ val remainHs = sortedByKind.values.
flatMap(hsByKind ⇒ Seq(hsByKind.head) ++ hsByKind.tail.filter(_.weight == hsByKind.head.weight)).
toSeq
@@ -476,6 +474,6 @@ object NCGeoEnricher extends NCServerEnricher {
// Drops GEO notes which are not included into enabled built-in token list.
// We can't do it before (or just ignore notes which are not from enabled list)
// because GEO notes with different types influence on each other during processing.
- GEO_TYPES.diff(ns.enabledBuiltInToks).flatMap(ns.getNotes).map(_.id).foreach(ns.removeNote)
+ GEO_TYPES.diff(ns.enabledBuiltInToks).flatMap(ns.getNotes).foreach(ns.removeNote)
}
}
diff --git a/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/quote/NCQuoteEnricher.scala b/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/quote/NCQuoteEnricher.scala
index 6ef6719..2d97843 100644
--- a/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/quote/NCQuoteEnricher.scala
+++ b/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/quote/NCQuoteEnricher.scala
@@ -133,7 +133,7 @@ object NCQuoteEnricher extends NCServerEnricher {
val nlpNote = newTok.getNlpNote
// NLP is single note.
- newTok.remove(nlpNote.id)
+ newTok.remove(nlpNote)
newTok.add(nlpNote.clone(Seq(tokIdx), Seq(tokIdx), "index" → tokIdx, "quoted" → false))
// It shouldn't care about other kind of notes because
diff --git a/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/stopword/NCStopWordEnricher.scala b/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/stopword/NCStopWordEnricher.scala
index c07fad4..92168ba 100644
--- a/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/stopword/NCStopWordEnricher.scala
+++ b/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/stopword/NCStopWordEnricher.scala
@@ -389,7 +389,7 @@ object NCStopWordEnricher extends NCServerEnricher {
!isException(Seq(tok)) &&
stopPoses.contains(tok.pos) &&
ns(idx + 1).isStopWord) {
- tok.markAsStop()
+ ns.fixNote(tok.getNlpNote, "stopWord" → true)
stop = false
}
@@ -457,7 +457,7 @@ object NCStopWordEnricher extends NCServerEnricher {
val newTok = tok.clone(idx)
def replace(nt: String): Unit =
- newTok.getNotes(nt).map(n ⇒ (n.id, n.clone(Seq(idx), Seq(idx)))).foreach(p ⇒ {
+ newTok.getNotes(nt).map(n ⇒ (n, n.clone(Seq(idx), Seq(idx)))).foreach(p ⇒ {
newTok.remove(p._1)
newTok.add(p._2)
})
@@ -466,9 +466,9 @@ object NCStopWordEnricher extends NCServerEnricher {
replace("nlpcraft:nlp")
// NLP note special case because has index field.
- newTok.getNlpNote += "index" → idx
-
ns += newTok
+
+ ns.fixNote(newTok.getNlpNote, "index" → idx)
}
if (isRBR(tok) && !tok.isQuoted)
@@ -597,7 +597,7 @@ object NCStopWordEnricher extends NCServerEnricher {
// be, was, is etc. or have done etc.
isCommonVerbs("have", "do")
if (stop)
- tok.markAsStop()
+ ns.fixNote(tok.getNlpNote, "stopWord" → true)
}
// +--------------------------------------+
// | Pass #3. |
@@ -607,7 +607,7 @@ object NCStopWordEnricher extends NCServerEnricher {
val mix = ns.tokenMixWithStopWords()
for (toks ← mix if !buf.exists(_.containsSlice(toks)) && isStop(toks) && !isException(toks)) {
- toks.foreach(_.markAsStop())
+ toks.foreach(tok ⇒ ns.fixNote(tok.getNlpNote, "stopWord" → true))
buf += toks
}
@@ -619,7 +619,7 @@ object NCStopWordEnricher extends NCServerEnricher {
// | Check external possessive stop-word file. |
// +--------------------------------------------+
for (tup ← origToks; key = tup._2 if POSSESSIVE_WORDS.contains(key) && !isException(tup._1))
- tup._1.foreach(_.markAsStop())
+ tup._1.foreach(tok ⇒ ns.fixNote(tok.getNlpNote, "stopWord" → true))
// +--------------------------------------------------+
// | Pass #5. |
@@ -632,7 +632,7 @@ object NCStopWordEnricher extends NCServerEnricher {
val startToks = ns.takeWhile(_.isStopWord) ++ ns.find(!_.isStopWord).map(p ⇒ p)
for (startTok ← startToks; tup ← origToks.filter(_._1.head == startTok); key = tup._2
if FIRST_WORDS.contains(key) && !isException(tup._1)) {
- tup._1.foreach(_.markAsStop())
+ tup._1.foreach(tok ⇒ ns.fixNote(tok.getNlpNote, "stopWord" → true))
foundKeys += key
}
@@ -642,7 +642,9 @@ object NCStopWordEnricher extends NCServerEnricher {
// +-------------------------------------------------+
for (tup ← origToks; key = tup._2 if !foundKeys.contains(key) && !isException(tup._1))
foundKeys.find(key.startsWith) match {
- case Some(s) ⇒ if (NOUN_WORDS.contains(key.substring(s.length).trim)) tup._1.foreach(_.markAsStop())
+ case Some(s) ⇒
+ if (NOUN_WORDS.contains(key.substring(s.length).trim))
+ tup._1.foreach(tok ⇒ ns.fixNote(tok.getNlpNote, "stopWord" → true))
case None ⇒ ()
}
diff --git a/src/test/scala/org/apache/nlpcraft/model/intent/dsl/NCDslTest.java b/src/test/scala/org/apache/nlpcraft/model/intent/dsl/NCDslTest.java
index db71b35..8ea9cba 100644
--- a/src/test/scala/org/apache/nlpcraft/model/intent/dsl/NCDslTest.java
+++ b/src/test/scala/org/apache/nlpcraft/model/intent/dsl/NCDslTest.java
@@ -44,6 +44,8 @@ class NCDslTest {
@AfterEach
void tearDown() throws NCException, IOException {
cli.close();
+
+ NCEmbeddedProbe.stop();
}
@Test
diff --git a/src/test/scala/org/apache/nlpcraft/models/nested/NCNestedTestModel.scala b/src/test/scala/org/apache/nlpcraft/models/nested/NCNestedTestModel.scala
index 2137e09..f55815a 100644
--- a/src/test/scala/org/apache/nlpcraft/models/nested/NCNestedTestModel.scala
+++ b/src/test/scala/org/apache/nlpcraft/models/nested/NCNestedTestModel.scala
@@ -47,9 +47,9 @@ class NCNestedTestModel extends NCModelAdapter("nlpcraft.nested.test", "Nested E
@NCIntent("intent=nested term={id=='x:nested'}")
private def onNested(ctx: NCIntentMatch): NCResult = "nested"
- @NCIntent("intent=nested term={id=='x:nested1'}")
+ @NCIntent("intent=nested1 term={id=='x:nested1'}")
private def onNested1(ctx: NCIntentMatch): NCResult = "nested1"
- @NCIntent("intent=nested term={id=='x:nested2'}")
+ @NCIntent("intent=nested2 term={id=='x:nested2'}")
private def onNested2(ctx: NCIntentMatch): NCResult = "nested2"
}
diff --git a/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/NCEnricherBaseSpec.scala b/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/NCEnricherBaseSpec.scala
index c59174f..5a8784d 100644
--- a/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/NCEnricherBaseSpec.scala
+++ b/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/NCEnricherBaseSpec.scala
@@ -17,9 +17,9 @@
package org.apache.nlpcraft.probe.mgrs.nlp.enrichers
+import org.apache.nlpcraft.model.NCModel
import org.apache.nlpcraft.model.tools.test.{NCTestClient, NCTestClientBuilder}
import org.apache.nlpcraft.probe.embedded.NCEmbeddedProbe
-
import org.junit.jupiter.api.Assertions.{assertTrue, fail}
import org.junit.jupiter.api.{AfterEach, BeforeEach}
import org.scalatest.Assertions
@@ -30,9 +30,21 @@ import org.scalatest.Assertions
class NCEnricherBaseSpec {
private var client: NCTestClient = _
+ // TODO:
+ def getModelClass[T <: NCModel]: Option[Class[NCModel]] = Some(classOf[NCEnricherTestModel].asInstanceOf[Class[NCModel]])
+
@BeforeEach
private[enrichers] def setUp(): Unit = {
- NCEmbeddedProbe.start(classOf[NCEnricherTestModel])
+ getModelClass match {
+ case Some(claxx) ⇒
+ println(s"Embedded probe is going to start with model: $claxx")
+
+ NCEmbeddedProbe.start(claxx)
+ case None ⇒
+ println("Embedded probe will not be started")
+
+ None
+ }
client = new NCTestClientBuilder().newBuilder.setResponseLog(false).build
@@ -40,7 +52,12 @@ class NCEnricherBaseSpec {
}
@AfterEach
- private[enrichers] def tearDown(): Unit = client.close()
+ private[enrichers] def tearDown(): Unit = {
+ if (client != null)
+ client.close()
+
+ NCEmbeddedProbe.stop()
+ }
/**
* Checks single variant.
@@ -112,9 +129,9 @@ class NCEnricherBaseSpec {
if (errs.nonEmpty) {
errs.foreach { case (err, i) ⇒
- System.err.println(s"${i + 1}. Test failed: ${err.getLocalizedMessage}")
+ println(s"${i + 1}. Test failed: ${err.getLocalizedMessage}")
- err.printStackTrace()
+ err.printStackTrace(System.out)
}
Assertions.fail(s"Failed ${errs.size} tests. See errors list above.")
diff --git a/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/NCEnricherTestModel.scala b/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/NCEnricherTestModel.scala
index 8023bdc..701372d 100644
--- a/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/NCEnricherTestModel.scala
+++ b/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/NCEnricherTestModel.scala
@@ -20,11 +20,10 @@ package org.apache.nlpcraft.probe.mgrs.nlp.enrichers
import java.util
import java.util.Collections
-import org.apache.nlpcraft.model.{NCContext, NCElement, NCModelAdapter, NCResult}
+import org.apache.nlpcraft.model.{NCContext, NCElement, NCModelAdapter, NCResult, NCValue}
import scala.collection.JavaConverters._
import scala.language.implicitConversions
-
import NCEnricherTestModel._
/**
@@ -42,7 +41,8 @@ class NCEnricherTestModel extends NCModelAdapter(ID, "Model enrichers test", "1.
mkElement("BC", "B C"),
mkElement("ABC", "A B C"),
mkElement("D1", "D"),
- mkElement("D2", "D")
+ mkElement("D2", "D"),
+ mkValueElement("V", "V1", "V2")
).asJava
private def mkElement(id: String, syns: String*): NCElement =
@@ -52,6 +52,18 @@ class NCEnricherTestModel extends NCModelAdapter(ID, "Model enrichers test", "1.
override def getGroups: util.List[String] = Collections.singletonList(GROUP)
}
+ private def mkValueElement(id: String, vals: String*): NCElement =
+ new NCElement {
+ override def getId: String = id
+ override def getSynonyms: util.List[String] = Collections.singletonList(id)
+ override def getGroups: util.List[String] = Collections.singletonList(GROUP)
+ override def getValues: util.List[NCValue] = vals.map(v ⇒ new NCValue {
+ override def getName: String = v
+ override def getSynonyms: util.List[String] = Collections.singletonList(v)
+ }).asJava
+ }
+
+
override def onContext(ctx: NCContext): NCResult =
NCResult.text(
NCTestSentence.serialize(ctx.getVariants.asScala.map(v ⇒ NCTestSentence(v.asScala.map(NCTestToken(_)))))
diff --git a/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/NCEnrichersTestBeans.scala b/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/NCEnrichersTestBeans.scala
index f97bc40..ef3fe41 100644
--- a/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/NCEnrichersTestBeans.scala
+++ b/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/NCEnrichersTestBeans.scala
@@ -234,12 +234,17 @@ case class NCTestAggregationToken(text: String, `type`: String, indexes: Seq[Int
s", note=$note>"
}
+object NCTestAggregationToken {
+ def apply(text: String, `type`: String, index: Int, note: String): NCTestAggregationToken =
+ new NCTestAggregationToken(text, `type`, Seq(index), note)
+}
+
case class NCTestLimitToken(
text: String,
limit: Double,
indexes: Seq[Int],
note: String,
- asc: Option[Boolean] = None
+ asc: Option[Boolean]
) extends NCTestToken {
require(text != null)
require(indexes != null)
@@ -263,9 +268,24 @@ case class NCTestLimitToken(
}
}
+object NCTestLimitToken {
+ def apply(text: String, limit: Double, indexes: Seq[Int], note: String, asc: Boolean): NCTestLimitToken =
+ new NCTestLimitToken(text, limit, indexes, note, Some(asc))
+
+ def apply(text: String, limit: Double, indexes: Seq[Int], note: String): NCTestLimitToken =
+ new NCTestLimitToken(text, limit, indexes, note, None)
+
+ def apply(text: String, limit: Double, index: Int, note: String, asc: Boolean): NCTestLimitToken =
+ new NCTestLimitToken(text, limit, Seq(index), note, Some(asc))
+
+ def apply(text: String, limit: Double, index: Int, note: String): NCTestLimitToken =
+ new NCTestLimitToken(text, limit, Seq(index), note, None)
+}
+
case class NCTestUserToken(text: String, id: String) extends NCTestToken {
require(text != null)
require(id != null)
+
override def toString: String = s"$text(user)<id=$id>"}
// Token and sentence beans and utilities.
@@ -332,7 +352,7 @@ object NCTestToken {
case "nlpcraft:limit" ⇒
val indexes: java.util.List[Int] = t.meta("nlpcraft:limit:indexes")
- val asc: Optional[Boolean] = t.metaOpt("nlpcraft:sort:asc")
+ val asc: Optional[Boolean] = t.metaOpt("nlpcraft:limit:asc")
NCTestLimitToken(
txt,
diff --git a/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/limit/NCEnricherLimitSpec.scala b/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/aggregation/NCEnricherAggregationSpec.scala
similarity index 70%
copy from src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/limit/NCEnricherLimitSpec.scala
copy to src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/aggregation/NCEnricherAggregationSpec.scala
index 7795bbc..f96a567 100644
--- a/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/limit/NCEnricherLimitSpec.scala
+++ b/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/aggregation/NCEnricherAggregationSpec.scala
@@ -15,25 +15,27 @@
* limitations under the License.
*/
-package org.apache.nlpcraft.probe.mgrs.nlp.enrichers.limit
+package org.apache.nlpcraft.probe.mgrs.nlp.enrichers.aggregation
-import org.apache.nlpcraft.probe.mgrs.nlp.enrichers.{NCEnricherBaseSpec, NCTestLimitToken ⇒ lim, NCTestUserToken ⇒ usr}
+import org.apache.nlpcraft.probe.mgrs.nlp.enrichers.{NCEnricherBaseSpec, NCTestAggregationToken ⇒ agg, NCTestUserToken ⇒ usr}
import org.junit.jupiter.api.Test
/**
- * Limit enricher test.
+ * Aggregation enricher test.
*/
-class NCEnricherLimitSpec extends NCEnricherBaseSpec {
+class NCEnricherAggregationSpec extends NCEnricherBaseSpec {
/**
*
* @throws Exception
*/
@Test
def test(): Unit = {
- checkExists(
- "top 5 A",
- lim(text = "top 5", limit = 5, indexes = Seq(1), note = "A", asc = Some(true)),
- usr(text = "A", id = "A")
+ runBatch(
+ _ ⇒ checkExists(
+ "max A",
+ agg(text = "max", `type` = "max", index = 1, note = "A"),
+ usr(text = "A", id = "A")
+ )
)
}
}
diff --git a/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/limit/NCEnricherLimitSpec.scala b/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/limit/NCEnricherLimitSpec.scala
index 7795bbc..2ba3638 100644
--- a/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/limit/NCEnricherLimitSpec.scala
+++ b/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/limit/NCEnricherLimitSpec.scala
@@ -30,10 +30,22 @@ class NCEnricherLimitSpec extends NCEnricherBaseSpec {
*/
@Test
def test(): Unit = {
- checkExists(
- "top 5 A",
- lim(text = "top 5", limit = 5, indexes = Seq(1), note = "A", asc = Some(true)),
- usr(text = "A", id = "A")
+ runBatch(
+ _ ⇒ checkExists(
+ "top A",
+ lim(text = "top", limit = 10, index = 1, note = "A", asc = false),
+ usr(text = "A", id = "A")
+ ),
+ _ ⇒ checkExists(
+ "few A B",
+ lim(text = "few", limit = 3, index = 1, note = "AB", asc = false),
+ usr(text = "A B", id = "AB")
+ ),
+ _ ⇒ checkExists(
+ "top 10 D1",
+ lim(text = "top 10", limit = 10, index = 1, note = "D1", asc = false),
+ usr(text = "D1", id = "D1")
+ )
)
}
}
diff --git a/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/limit/NCEnricherLimitSpec.scala b/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/relation/NCEnricherRelationSpec.scala
similarity index 64%
copy from src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/limit/NCEnricherLimitSpec.scala
copy to src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/relation/NCEnricherRelationSpec.scala
index 7795bbc..8253217 100644
--- a/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/limit/NCEnricherLimitSpec.scala
+++ b/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/relation/NCEnricherRelationSpec.scala
@@ -15,25 +15,29 @@
* limitations under the License.
*/
-package org.apache.nlpcraft.probe.mgrs.nlp.enrichers.limit
+package org.apache.nlpcraft.probe.mgrs.nlp.enrichers.relation
-import org.apache.nlpcraft.probe.mgrs.nlp.enrichers.{NCEnricherBaseSpec, NCTestLimitToken ⇒ lim, NCTestUserToken ⇒ usr}
+import org.apache.nlpcraft.probe.mgrs.nlp.enrichers.{NCEnricherBaseSpec, NCTestRelationToken ⇒ rel, NCTestUserToken ⇒ usr, NCTestNlpToken ⇒ nlp}
import org.junit.jupiter.api.Test
/**
- * Limit enricher test.
+ * Relation enricher test.
*/
-class NCEnricherLimitSpec extends NCEnricherBaseSpec {
+class NCEnricherRelationSpec extends NCEnricherBaseSpec {
/**
*
* @throws Exception
*/
@Test
def test(): Unit = {
- checkExists(
- "top 5 A",
- lim(text = "top 5", limit = 5, indexes = Seq(1), note = "A", asc = Some(true)),
- usr(text = "A", id = "A")
+ runBatch(
+ _ ⇒ checkExists(
+ "compare V1 and V2",
+ rel(text = "compare", `type` = "compare", indexes = Seq(1, 3), note = "V"),
+ usr(text = "V1", id = "V"),
+ nlp(text = "and", isStop = true),
+ usr(text = "V2", id = "V")
+ )
)
}
}