You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nlpcraft.apache.org by ar...@apache.org on 2020/03/17 22:19:20 UTC
[incubator-nlpcraft] branch master updated: Fix for NLPCRAFT-16.
This is an automated email from the ASF dual-hosted git repository.
aradzinski pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
The following commit(s) were added to refs/heads/master by this push:
new 330f7b8 Fix for NLPCRAFT-16.
330f7b8 is described below
commit 330f7b826cfd0373d103a468a6224724cf750ce8
Author: Aaron Radzinzski <ar...@datalingvo.com>
AuthorDate: Tue Mar 17 15:19:07 2020 -0700
Fix for NLPCRAFT-16.
---
.../nlp/enrichers/post/NCPostEnrichProcessor.scala | 54 +--
.../mgrs/nlp/enrichers/sort/NCSortEnricher.scala | 91 ++---
.../mgrs/nlp/enrichers/NCEnricherBaseSpec.scala | 125 +++++++
.../mgrs/nlp/enrichers/NCEnricherTestModel.scala | 65 ++++
.../mgrs/nlp/enrichers/NCEnrichersTestBeans.scala | 378 +++++++++++++++++++++
.../nlp/enrichers/limit/NCEnricherLimitSpec.scala | 39 +++
.../nlp/enrichers/sort/NCEnricherSortSpec.scala | 143 ++++++++
7 files changed, 831 insertions(+), 64 deletions(-)
diff --git a/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/post/NCPostEnrichProcessor.scala b/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/post/NCPostEnrichProcessor.scala
index 1ecfb14..c7d94c5 100644
--- a/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/post/NCPostEnrichProcessor.scala
+++ b/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/post/NCPostEnrichProcessor.scala
@@ -132,9 +132,11 @@ object NCPostEnrichProcessor extends NCService with LazyLogging {
(
p.wordIndexes,
p.noteType,
- p.get("asc"),
- p.get("indexes"),
- p.get("note")
+ p.get("subjNotes"),
+ p.get("subjIndexes"),
+ p.getOrElse("byNotes", null),
+ p.getOrElse("byIndexes", null),
+ p.getOrElse("asc", null)
)
case "nlpcraft:limit" ⇒
(
@@ -245,6 +247,9 @@ object NCPostEnrichProcessor extends NCService with LazyLogging {
s"${t.origText}{index:${t.index}}[${t.map(n ⇒ s"${n.noteType}, {range:${n.tokenFrom}-${n.tokenTo}}").mkString("|")}]"
).mkString("\n")
)
+ else
+ logger.trace(s"Invalid notes combination skipped: [${nsNotes.map(p ⇒ s"${p._1} → [${p._2.mkString(",")}]").mkString("|")}]")
+
res
}
@@ -351,7 +356,9 @@ object NCPostEnrichProcessor extends NCService with LazyLogging {
ns: NCNlpSentence,
history: Seq[(Int, Int)]
): Boolean = {
- ns.filter(_.isTypeOf(noteType)).foreach(tok ⇒
+ var ok = true
+
+ for (tok ← ns.filter(_.isTypeOf(noteType)) if ok)
tok.getNoteOpt(noteType, idxsField) match {
case Some(n) ⇒
val idxs: Seq[Seq[Int]] = n.data[java.util.List[java.util.List[Int]]](idxsField).asScala.map(_.asScala)
@@ -359,33 +366,34 @@ object NCPostEnrichProcessor extends NCService with LazyLogging {
history.foreach { case (idxOld, idxNew) ⇒ fixed = fixed.map(_.map(i ⇒ if (i == idxOld) idxNew else i).distinct) }
- if (idxs != fixed) {
- fixed.foreach(p ⇒ require(p.size == 1))
-
- // Fix double dimension array to one dimension.
+ if (fixed.forall(_.size == 1)) {
+ // Fix double dimension array to one dimension,
+ // so it should be called always inspite of fixIndexesReferences method.
n += idxsField → fixed.map(_.head).asJava.asInstanceOf[java.io.Serializable]
def x(seq: Seq[Seq[Int]]): String = s"[${seq.map(p ⇒ s"[${p.mkString(",")}]").mkString(", ")}]"
logger.trace(s"`$noteType` note `indexes` fixed [old=${x(idxs)}}, new=${x(fixed)}]")
}
+ else
+ ok = false
case None ⇒ // No-op.
}
- )
- ns.flatMap(_.getNotes(noteType)).forall(rel ⇒
- rel.dataOpt[java.util.List[Int]](idxsField) match {
- case Some(idxsList) ⇒
- val notesTypes = rel.data[util.List[String]](noteField)
+ ok &&
+ ns.flatMap(_.getNotes(noteType)).forall(rel ⇒
+ rel.dataOpt[java.util.List[Int]](idxsField) match {
+ case Some(idxsList) ⇒
+ val notesTypes = rel.data[util.List[String]](noteField)
- require(idxsList.size() == notesTypes.size())
+ require(idxsList.size() == notesTypes.size())
- idxsList.asScala.zip(notesTypes.asScala).forall {
- case (idxs, notesType) ⇒ checkRelation(ns, Seq(idxs), notesType, rel.id)
- }
- case None ⇒ true
- }
- )
+ idxsList.asScala.zip(notesTypes.asScala).forall {
+ case (idxs, notesType) ⇒ checkRelation(ns, Seq(idxs), notesType, rel.id)
+ }
+ case None ⇒ true
+ }
+ )
}
/**
@@ -674,8 +682,6 @@ object NCPostEnrichProcessor extends NCService with LazyLogging {
def getNotNlpNotes(toks: Seq[NCNlpSentenceToken]): Seq[NCNlpSentenceNote] =
toks.flatten.filter(!_.isNlp).distinct
- val notNlpTypes = getNotNlpNotes(ns).map(_.noteType).distinct
-
val delCombs: Seq[NCNlpSentenceNote] =
getNotNlpNotes(ns).
flatMap(note ⇒ getNotNlpNotes(ns.slice(note.tokenFrom, note.tokenTo + 1)).filter(_ != note)).
@@ -712,6 +718,8 @@ object NCPostEnrichProcessor extends NCService with LazyLogging {
deleted += delComb
+ val notNlpTypes = getNotNlpNotes(nsClone).map(_.noteType).distinct
+
if (collapse(nsClone, notNlpTypes, idCache)) Some(nsClone) else None
}
else
@@ -759,7 +767,7 @@ object NCPostEnrichProcessor extends NCService with LazyLogging {
m.values.map(_.sentence).toSeq
}
else {
- if (collapse(ns, notNlpTypes, idCache)) Seq(ns) else Seq.empty
+ if (collapse(ns, getNotNlpNotes(ns).map(_.noteType).distinct, idCache)) Seq(ns) else Seq.empty
}.distinct
sens.foreach(sen ⇒
diff --git a/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/sort/NCSortEnricher.scala b/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/sort/NCSortEnricher.scala
index 436aafa..5f2253c 100644
--- a/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/sort/NCSortEnricher.scala
+++ b/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/sort/NCSortEnricher.scala
@@ -81,7 +81,10 @@ object NCSortEnricher extends NCProbeEnricher {
"SORT x BY ORDER"
)
- case class NoteData(note: String, indexes: Seq[Int])
+ case class NoteData(note: String, indexes: Seq[Int]) {
+ // Added for debug reasons.
+ override def toString: String = s"NoteData [note=$note, indexes=[${indexes.mkString(",")}]]"
+ }
private case class Match(
asc: Option[Boolean],
@@ -147,47 +150,60 @@ object NCSortEnricher extends NCProbeEnricher {
*
* @param toks
*/
- private def split(toks: Seq[NCNlpSentenceToken]): Seq[Seq[NoteData]] = {
+ private def split(toks: Seq[NCNlpSentenceToken], nullable: Boolean): Seq[Seq[NoteData]] = {
val all =
toks.flatten.
filter(!_.isNlp).map(n ⇒ NoteData(n.noteType, n.tokenFrom to n.tokenTo)).
sortBy(_.indexes.head).distinct
- if (all.nonEmpty) {
- val res = mutable.ArrayBuffer.empty[Seq[NoteData]]
+ val res =
+ if (all.nonEmpty) {
+ val res = mutable.ArrayBuffer.empty[Seq[NoteData]]
- /**
- * Returns flag which indicates are token contiguous or not.
- *
- * @param tok1Idx First token index.
- * @param tok2Idx Second token index.
- */
- def contiguous(tok1Idx: Int, tok2Idx: Int): Boolean = {
- val between = toks.filter(t ⇒ t.index > tok1Idx && t.index < tok2Idx)
+ /**
+ * Returns flag which indicates are token contiguous or not.
+ *
+ * @param tok1Idx First token index.
+ * @param tok2Idx Second token index.
+ */
+ def contiguous(tok1Idx: Int, tok2Idx: Int): Boolean = {
+ val between = toks.filter(t ⇒ t.index > tok1Idx && t.index < tok2Idx)
- between.isEmpty || between.forall(p ⇒ p.isStopWord || p.stem == STEM_AND)
- }
+ between.isEmpty || between.forall(p ⇒ p.isStopWord || p.stem == STEM_AND)
+ }
+
+ val min = toks.dropWhile(_.isNlp).head.index
+ val max = toks.reverse.dropWhile(_.isNlp).head.index
+
+ require(min <= max)
+
+ def fill(nd: NoteData, seq: mutable.ArrayBuffer[NoteData] = mutable.ArrayBuffer.empty[NoteData]): Unit = {
+ seq += nd
- def fill(nd: NoteData, seq: mutable.ArrayBuffer[NoteData] = mutable.ArrayBuffer.empty[NoteData]): Unit = {
- seq += nd
+ all.
+ filter(p ⇒ nd.indexes.last < p.indexes.head && contiguous(nd.indexes.last, p.indexes.head)).
+ foreach(fill(_, mutable.ArrayBuffer.empty[NoteData] ++ seq.clone()))
- all.
- filter(p ⇒ nd.indexes.last < p.indexes.head && contiguous(nd.indexes.last, p.indexes.head)).
- foreach(fill(_, mutable.ArrayBuffer.empty[NoteData] ++ seq.clone()))
+ if (seq.nonEmpty && seq.head.indexes.head == min && seq.last.indexes.last == max)
+ res += seq
+ }
+
+ all.filter(_.indexes.head == min).foreach(p ⇒ fill(p))
- if (seq.nonEmpty &&
- seq.head.indexes.head == all.head.indexes.head &&
- seq.last.indexes.last == all.last.indexes.last
- )
- res += seq
+ res
}
+ else
+ Seq.empty
- fill(all.head)
+ if (res.isEmpty && !nullable)
+ throw new AssertionError(s"Invalid null result " +
+ s"[tokens=[${toks.map(_.origText).mkString(", ")}]" +
+ s", tokensIndexes=[${toks.map(_.index).mkString(", ")}]" +
+ s", allData=[${all.mkString(", ")}]" +
+ s"]"
+ )
- res
- }
- else
- Seq.empty
+ res
}
/**
@@ -292,18 +308,11 @@ object NCSortEnricher extends NCProbeEnricher {
require(subj.nonEmpty)
- Some(
- Match(
- asc = h.order match {
- case Some(order) ⇒ Some(ORDER(order.synonymIndex)._2)
- case None ⇒ None
- },
- main = h.sort.tokens,
- stop = h.byTokens ++ h.orderTokens,
- subjSeq = split(subj),
- bySeq = split(by)
- )
- )
+ val subjSeq = split(subj, nullable = false)
+ val bySeq = split(by, nullable = true)
+ val asc = h.order.flatMap(order ⇒ Some(ORDER(order.synonymIndex)._2))
+
+ Some(Match(asc, main = h.sort.tokens, stop = h.byTokens ++ h.orderTokens, subjSeq, bySeq))
}
else
None
diff --git a/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/NCEnricherBaseSpec.scala b/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/NCEnricherBaseSpec.scala
new file mode 100644
index 0000000..c59174f
--- /dev/null
+++ b/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/NCEnricherBaseSpec.scala
@@ -0,0 +1,125 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nlpcraft.probe.mgrs.nlp.enrichers
+
+import org.apache.nlpcraft.model.tools.test.{NCTestClient, NCTestClientBuilder}
+import org.apache.nlpcraft.probe.embedded.NCEmbeddedProbe
+
+import org.junit.jupiter.api.Assertions.{assertTrue, fail}
+import org.junit.jupiter.api.{AfterEach, BeforeEach}
+import org.scalatest.Assertions
+
+/**
+ * Enrichers tests utility base class.
+ */
+class NCEnricherBaseSpec {
+ private var client: NCTestClient = _
+
+ @BeforeEach
+ private[enrichers] def setUp(): Unit = {
+ NCEmbeddedProbe.start(classOf[NCEnricherTestModel])
+
+ client = new NCTestClientBuilder().newBuilder.setResponseLog(false).build
+
+ client.open(NCEnricherTestModel.ID)
+ }
+
+ @AfterEach
+ private[enrichers] def tearDown(): Unit = client.close()
+
+ /**
+ * Checks single variant.
+ *
+ * @param txt
+ * @param expToks
+ */
+ private[enrichers] def checkExists(txt: String, expToks: NCTestToken*): Unit = {
+ val res = client.ask(txt)
+
+ if (res.isFailed)
+ fail(s"Result failed [text=$txt, error=${res.getResultError.get()}]")
+
+ assertTrue(res.getResult.isPresent, s"Missed result data")
+
+ val sens = NCTestSentence.deserialize(res.getResult.get())
+ val expSen = NCTestSentence(expToks)
+
+ assertTrue(
+ sens.exists(_ == expSen),
+ s"Required sentence not found [request=$txt, \nexpected=\n$expSen, \nfound=\n${sens.mkString("\n")}\n]"
+ )
+ }
+
+ /**
+ * Checks multiple variants.
+ *
+ * @param txt
+ * @param expToks
+ */
+ private[enrichers] def checkAll(txt: String, expToks: Seq[NCTestToken]*): Unit = {
+ val res = client.ask(txt)
+
+ if (res.isFailed)
+ fail(s"Result failed [text=$txt, error=${res.getResultError.get()}]")
+
+ assertTrue(res.getResult.isPresent, s"Missed result data")
+
+ val expSens = expToks.map(NCTestSentence(_))
+ val sens = NCTestSentence.deserialize(res.getResult.get())
+
+ require(
+ expSens.size == sens.size,
+ s"Unexpected response size [request=$txt, expected=${expSens.size}, received=${sens.size}]"
+ )
+
+ for (expSen ← expSens)
+ require(
+ sens.exists(_ == expSen),
+ s"Required sentence not found [request=$txt, \nexpected=\n$expSen, \nfound=\n${sens.mkString("\n")}\n]"
+ )
+ }
+
+ /**
+ *
+ * @param tests
+ */
+ private[enrichers] def runBatch(tests: Unit ⇒ Unit*): Unit = {
+ val errs = tests.zipWithIndex.flatMap { case (test, i) ⇒
+ try {
+ test.apply(())
+
+ None
+ }
+ catch {
+ case e: Throwable ⇒ Some(e, i)
+ }
+ }
+
+ if (errs.nonEmpty) {
+ errs.foreach { case (err, i) ⇒
+ System.err.println(s"${i + 1}. Test failed: ${err.getLocalizedMessage}")
+
+ err.printStackTrace()
+ }
+
+ Assertions.fail(s"Failed ${errs.size} tests. See errors list above.")
+ }
+ else
+ println(s"All tests passed: ${tests.size}")
+ }
+}
diff --git a/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/NCEnricherTestModel.scala b/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/NCEnricherTestModel.scala
new file mode 100644
index 0000000..8023bdc
--- /dev/null
+++ b/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/NCEnricherTestModel.scala
@@ -0,0 +1,65 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nlpcraft.probe.mgrs.nlp.enrichers
+
+import java.util
+import java.util.Collections
+
+import org.apache.nlpcraft.model.{NCContext, NCElement, NCModelAdapter, NCResult}
+
+import scala.collection.JavaConverters._
+import scala.language.implicitConversions
+
+import NCEnricherTestModel._
+
+/**
+ * Enrichers test model.
+ */
+class NCEnricherTestModel extends NCModelAdapter(ID, "Model enrichers test", "1.0") {
+ private implicit def convert(s: String): NCResult = NCResult.text(s)
+
+ override def getElements: util.Set[NCElement] =
+ Set(
+ mkElement("A", "A"),
+ mkElement("B", "B"),
+ mkElement("C", "C"),
+ mkElement("AB", "A B"),
+ mkElement("BC", "B C"),
+ mkElement("ABC", "A B C"),
+ mkElement("D1", "D"),
+ mkElement("D2", "D")
+ ).asJava
+
+ private def mkElement(id: String, syns: String*): NCElement =
+ new NCElement {
+ override def getId: String = id
+ override def getSynonyms: util.List[String] = syns.asJava
+ override def getGroups: util.List[String] = Collections.singletonList(GROUP)
+ }
+
+ override def onContext(ctx: NCContext): NCResult =
+ NCResult.text(
+ NCTestSentence.serialize(ctx.getVariants.asScala.map(v ⇒ NCTestSentence(v.asScala.map(NCTestToken(_)))))
+ )
+}
+
+object NCEnricherTestModel {
+ final val ID = "test.enricher"
+
+ private final val GROUP = "test-enricher-group"
+}
diff --git a/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/NCEnrichersTestBeans.scala b/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/NCEnrichersTestBeans.scala
new file mode 100644
index 0000000..f97bc40
--- /dev/null
+++ b/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/NCEnrichersTestBeans.scala
@@ -0,0 +1,378 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nlpcraft.probe.mgrs.nlp.enrichers
+
+import java.io.{ByteArrayInputStream, ByteArrayOutputStream, ObjectInputStream, ObjectOutputStream}
+import java.nio.charset.StandardCharsets.UTF_8
+import java.util.{Base64, Optional}
+
+import org.apache.nlpcraft.model.NCToken
+import resource.managed
+
+import scala.compat.java8.OptionConverters._
+import scala.collection.JavaConverters._
+
+/**
+ * Tests infrastructure beans.
+ */
+
+sealed trait NCTestToken {
+ def id: String
+ def text: String
+ def isStop: Boolean = false
+}
+
+// Simplified set of tokens data. Added only fields for validation.
+
+// Server enrichers.
+case class NCTestNlpToken(text: String, override val isStop: Boolean = false) extends NCTestToken {
+ require(text != null)
+
+ override def id: String = "nlpcraft:nlp"
+ override def toString: String = s"$text(nlp)<isStop=$isStop>"
+}
+
+// Skip non-deteministric properties verification.
+case class NCTestDateToken(text: String) extends NCTestToken {
+ require(text != null)
+
+ override def id: String = "nlpcraft:date"
+ override def toString: String = s"$text(date)"
+}
+
+case class NCTestCoordinateToken(text: String, latitude: Double, longitude: Double) extends NCTestToken {
+ require(text != null)
+
+ override def id: String = "nlpcraft:coordinate"
+ override def toString: String = s"$text(coordinate)<lon=$longitude, lat=$longitude>"
+}
+
+case class NCTestNumericToken(text: String, from: Double, to: Double) extends NCTestToken {
+ require(text != null)
+
+ override def id: String = "nlpcraft:num"
+ override def toString: String = s"$text(num)<from=$from, to=$to>"
+}
+
+case class NCTestCityToken(text: String, city: String) extends NCTestToken {
+ require(text != null)
+ require(city != null)
+
+ override def id: String = "nlpcraft:city"
+ override def toString: String = s"$text(city)[city=$city]"
+}
+
+case class NCTestCountryToken(text: String, country: String) extends NCTestToken {
+ require(text != null)
+ require(country != null)
+
+ override def id: String = "nlpcraft:country"
+ override def toString: String = s"$text(country)<country=$country>"
+}
+
+case class NCTestRegionToken(text: String, region: String) extends NCTestToken {
+ require(text != null)
+ require(region != null)
+
+ override def id: String = "nlpcraft:region"
+ override def toString: String = s"$text(region)<region=$region>"
+}
+
+case class NCTestContinentToken(text: String, continent: String) extends NCTestToken {
+ require(text != null)
+ require(continent != null)
+
+ override def id: String = "nlpcraft:continent"
+ override def toString: String = s"$text(continent)<continent=$continent>"
+}
+
+case class NCTestSubcontinentToken(text: String, subcontinent: String) extends NCTestToken {
+ require(text != null)
+ require(subcontinent != null)
+
+ override def id: String = "nlpcraft:subcontinent"
+ override def toString: String = s"$text(subcontinent)<subcontinent=$subcontinent>"
+}
+
+case class NCTestMetroToken(text: String, metro: String) extends NCTestToken {
+ require(text != null)
+ require(metro != null)
+
+ override def id: String = "nlpcraft:metro"
+ override def toString: String = s"$text(metro)<metro=$metro>"
+}
+
+// Probe enrichers.
+case class NCTestSortToken(
+ text: String,
+ subjNotes: Seq[String],
+ subjIndexes: Seq[Int],
+ byNotes: Option[Seq[String]] = None,
+ byIndexes: Option[Seq[Int]] = None,
+ asc: Option[Boolean] = None
+) extends NCTestToken {
+ require(text != null)
+ require(subjNotes != null)
+ require(subjNotes.nonEmpty)
+ require(subjIndexes != null)
+ require(subjIndexes.nonEmpty)
+ require(byNotes != null)
+ require(byNotes.isEmpty || byNotes.get.nonEmpty)
+ require(byIndexes != null)
+ require(byIndexes.isEmpty || byIndexes.get.nonEmpty)
+ require(asc != null)
+
+ override def id: String = "nlpcraft:sort"
+ override def toString: String = {
+ var s = s"$text(sort)" +
+ s"<subjNotes=[${subjNotes.mkString(",")}]" +
+ s", subjIndexes=[${subjIndexes.mkString(",")}]"
+
+ if (byNotes.isDefined)
+ s = s"$s" +
+ s", byNotes=[${byNotes.get.mkString(",")}]" +
+ s", byIndexes=[${byIndexes.get.mkString(",")}]"
+
+ if (asc.isDefined)
+ s = s"$s, asc=${asc.get}"
+
+ s = s"$s>"
+
+ s
+ }
+}
+
+object NCTestSortToken {
+ def apply(
+ text: String,
+ subjNotes: Seq[String],
+ subjIndexes: Seq[Int],
+ asc: Boolean
+ ): NCTestSortToken = new NCTestSortToken(text, subjNotes, subjIndexes, None, None, Some(asc))
+
+ def apply(
+ text: String,
+ subjNote: String,
+ subjIndex: Int,
+ asc: Boolean
+ ): NCTestSortToken = new NCTestSortToken(text, Seq(subjNote), Seq(subjIndex), None, None, Some(asc))
+
+ def apply(
+ text: String,
+ subjNote: String,
+ subjIndex: Int
+ ): NCTestSortToken = new NCTestSortToken(text, Seq(subjNote), Seq(subjIndex), None, None, None)
+
+ def apply(
+ text: String,
+ subjNotes: Seq[String],
+ subjIndexes: Seq[Int],
+ byNotes: Seq[String],
+ byIndexes: Seq[Int]
+ ): NCTestSortToken = new NCTestSortToken(text, subjNotes, subjIndexes, Some(byNotes), Some(byIndexes), None)
+
+ def apply(
+ text: String,
+ subjNote: String,
+ subjIndex: Int,
+ byNote: String,
+ byIndex: Int): NCTestSortToken =
+ new NCTestSortToken(text, Seq(subjNote), Seq(subjIndex), Some(Seq(byNote)), Some(Seq(byIndex)), None)
+
+ def apply(
+ text: String,
+ subjNote: String,
+ subjIndex: Int,
+ byNote: String,
+ byIndex: Int,
+ asc: Boolean): NCTestSortToken =
+ new NCTestSortToken(text, Seq(subjNote), Seq(subjIndex), Some(Seq(byNote)), Some(Seq(byIndex)), Some(asc))
+}
+
+case class NCTestRelationToken(text: String, `type`: String, indexes: Seq[Int], note: String) extends NCTestToken {
+ require(text != null)
+ require(`type` != null)
+ require(indexes != null)
+ require(indexes.nonEmpty)
+ require(note != null)
+
+ override def id: String = "nlpcraft:relation"
+ override def toString: String =
+ s"$text(relation)" +
+ s"<type=${`type`}" +
+ s", indexes=[${indexes.mkString(",")}]" +
+ s", note=$note>"
+}
+
+case class NCTestAggregationToken(text: String, `type`: String, indexes: Seq[Int], note: String) extends NCTestToken {
+ require(text != null)
+ require(`type` != null)
+ require(indexes != null)
+ require(indexes.nonEmpty)
+ require(note != null)
+
+ override def id: String = "nlpcraft:aggregation"
+ override def toString: String =
+ s"$text(aggregation)" +
+ s"<type=${`type`}" +
+ s", indexes=[${indexes.mkString(",")}]" +
+ s", note=$note>"
+}
+
+case class NCTestLimitToken(
+ text: String,
+ limit: Double,
+ indexes: Seq[Int],
+ note: String,
+ asc: Option[Boolean] = None
+) extends NCTestToken {
+ require(text != null)
+ require(indexes != null)
+ require(indexes.nonEmpty)
+ require(note != null)
+ require(asc != null)
+
+ override def id: String = "nlpcraft:limit"
+ override def toString: String = {
+ var s = s"$text(limit)" +
+ s"<limit=$limit" +
+ s", indexes=[${indexes.mkString(",")}]" +
+ s", note=$note"
+
+ if (asc.isDefined)
+ s = s"$s, asc=${asc.get}"
+
+ s = s"$s>"
+
+ s
+ }
+}
+
+case class NCTestUserToken(text: String, id: String) extends NCTestToken {
+ require(text != null)
+ require(id != null)
+ override def toString: String = s"$text(user)<id=$id>"}
+
+// Token and sentence beans and utilities.
+
+object NCTestToken {
+ def apply(t: NCToken): NCTestToken = {
+ val txt = t.getOriginalText
+ val id = t.getId
+
+ id match {
+ case "nlpcraft:nlp" ⇒ NCTestNlpToken(txt, t.isStopWord)
+ case "nlpcraft:coordinate" ⇒
+ NCTestCoordinateToken(
+ txt,
+ latitude = t.meta("nlpcraft:coordinate:latitude"),
+ longitude = t.meta("nlpcraft:coordinate:longitude")
+ )
+ case "nlpcraft:num" ⇒ NCTestNumericToken(
+ txt,
+ from = t.meta("nlpcraft:num:from"),
+ to = t.meta("nlpcraft:num:to")
+ )
+ case "nlpcraft:date" ⇒ NCTestDateToken(txt)
+ case "nlpcraft:city" ⇒ NCTestCityToken(txt, city = t.meta("nlpcraft:city:city"))
+ case "nlpcraft:region" ⇒ NCTestRegionToken(txt, region = t.meta("nlpcraft:region:region"))
+ case "nlpcraft:country" ⇒ NCTestCountryToken(txt, country = t.meta("nlpcraft:country:country"))
+ case "nlpcraft:subcontinent" ⇒
+ NCTestSubcontinentToken(txt, subcontinent = t.meta("nlpcraft:subcontinent:subcontinent"))
+ case "nlpcraft:continent" ⇒
+ NCTestContinentToken(txt, continent = t.meta("nlpcraft:continent:continent"))
+ case "nlpcraft:metro" ⇒ NCTestMetroToken(txt, metro = t.meta("nlpcraft:metro:metro"))
+ case "nlpcraft:sort" ⇒
+ val subjNotes: java.util.List[String] = t.meta("nlpcraft:sort:subjnotes")
+ val subjIndexes: java.util.List[Int] = t.meta("nlpcraft:sort:subjindexes")
+ val byNotes: Optional[java.util.List[String]] = t.metaOpt("nlpcraft:sort:bynotes")
+ val byIndexes: Optional[java.util.List[Int]] = t.metaOpt("nlpcraft:sort:byindexes")
+ val asc: Optional[Boolean] = t.metaOpt("nlpcraft:sort:asc")
+
+ def toOpt[T](lOpt: Optional[java.util.List[T]]): Option[Seq[T]] =
+ lOpt.asScala match {
+ case Some(l) ⇒ Some(l.asScala)
+ case None ⇒ None
+ }
+
+ NCTestSortToken(txt, subjNotes.asScala, subjIndexes.asScala, toOpt(byNotes), toOpt(byIndexes), asc.asScala)
+ case "nlpcraft:relation" ⇒
+ val indexes: java.util.List[Int] = t.meta("nlpcraft:relation:indexes")
+
+ NCTestRelationToken(
+ txt,
+ `type` = t.meta("nlpcraft:relation:type"),
+ indexes = indexes.asScala,
+ note = t.meta("nlpcraft:relation:note")
+ )
+ case "nlpcraft:aggregation" ⇒
+ val indexes: java.util.List[Int] = t.meta("nlpcraft:aggregation:indexes")
+
+ NCTestAggregationToken(
+ txt,
+ `type` = t.meta("nlpcraft:aggregation:type"),
+ indexes = indexes.asScala,
+ note = t.meta("nlpcraft:aggregation:note")
+ )
+
+ case "nlpcraft:limit" ⇒
+ val indexes: java.util.List[Int] = t.meta("nlpcraft:limit:indexes")
+ val asc: Optional[Boolean] = t.metaOpt("nlpcraft:sort:asc")
+
+ NCTestLimitToken(
+ txt,
+ limit = t.meta("nlpcraft:limit:limit"),
+ indexes = indexes.asScala,
+ note = t.meta("nlpcraft:limit:note"),
+ asc.asScala
+ )
+
+ case _ ⇒
+ if (t.isUserDefined)
+ NCTestUserToken(txt, id)
+ else
+ throw new AssertionError(s"Unsupported token: $id")
+ }
+ }
+
+ def apply(text: String, isStop: Boolean): NCTestToken = NCTestNlpToken(text, isStop)
+}
+
+case class NCTestSentence(tokens: Seq[NCTestToken]) {
+ override def toString = s"Sentence: ${tokens.mkString("|")}"
+}
+
+object NCTestSentence {
+ def serialize(sens: Iterable[NCTestSentence]): String =
+ managed(new ByteArrayOutputStream()) acquireAndGet { bos ⇒
+ managed(new ObjectOutputStream(bos)) acquireAndGet { os ⇒
+ os.writeObject(sens)
+
+ os.flush()
+
+ new String(Base64.getEncoder.encode(bos.toByteArray), UTF_8)
+ }
+ }
+
+ def deserialize(s: String): Iterable[NCTestSentence] =
+ managed(new ObjectInputStream(
+ new ByteArrayInputStream(Base64.getDecoder.decode(s.getBytes(UTF_8))))
+ ) acquireAndGet { is ⇒
+ is.readObject.asInstanceOf[Iterable[NCTestSentence]]
+ }
+}
\ No newline at end of file
diff --git a/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/limit/NCEnricherLimitSpec.scala b/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/limit/NCEnricherLimitSpec.scala
new file mode 100644
index 0000000..7795bbc
--- /dev/null
+++ b/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/limit/NCEnricherLimitSpec.scala
@@ -0,0 +1,39 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nlpcraft.probe.mgrs.nlp.enrichers.limit
+
+import org.apache.nlpcraft.probe.mgrs.nlp.enrichers.{NCEnricherBaseSpec, NCTestLimitToken ⇒ lim, NCTestUserToken ⇒ usr}
+import org.junit.jupiter.api.Test
+
+/**
+ * Limit enricher test.
+ */
+class NCEnricherLimitSpec extends NCEnricherBaseSpec {
+ /**
+ *
+ * @throws Exception
+ */
+ @Test
+ def test(): Unit = {
+ checkExists(
+ "top 5 A",
+ lim(text = "top 5", limit = 5, indexes = Seq(1), note = "A", asc = Some(true)),
+ usr(text = "A", id = "A")
+ )
+ }
+}
diff --git a/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/sort/NCEnricherSortSpec.scala b/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/sort/NCEnricherSortSpec.scala
new file mode 100644
index 0000000..a74c835
--- /dev/null
+++ b/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/sort/NCEnricherSortSpec.scala
@@ -0,0 +1,143 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nlpcraft.probe.mgrs.nlp.enrichers.sort
+
+import org.apache.nlpcraft.probe.mgrs.nlp.enrichers.{NCEnricherBaseSpec, NCTestNlpToken ⇒ nlp, NCTestSortToken ⇒ srt, NCTestUserToken ⇒ usr}
+import org.junit.jupiter.api.Test
+
+/**
+ * Sort enricher test.
+ */
+class NCEnricherSortSpec extends NCEnricherBaseSpec {
+ /**
+ *
+ * @throws Exception
+ */
+ @Test
+ def test(): Unit =
+ runBatch(
+ _ ⇒ checkExists(
+ "sort A",
+ srt(text = "sort", subjNote = "A", subjIndex = 1),
+ usr("A", "A")
+ ),
+ _ ⇒ checkExists(
+ "sort A by A",
+ srt(text = "sort", subjNote = "A", subjIndex = 1, byNote = "A", byIndex = 3),
+ usr(text = "A", id = "A"),
+ nlp(text = "by", isStop = true),
+ usr(text = "A", id = "A")
+ ),
+ _ ⇒ checkExists(
+ "sort A, C by A, C",
+ srt(text = "sort", subjNotes = Seq("A", "C"), subjIndexes = Seq(1, 3), byNotes = Seq("A", "C"), byIndexes = Seq(5, 7)),
+ usr(text = "A", id = "A"),
+ nlp(text = ",", isStop = true),
+ usr(text = "C", id = "C"),
+ nlp(text = "by", isStop = true),
+ usr(text = "A", id = "A"),
+ nlp(text = ",", isStop = true),
+ usr(text = "C", id = "C")
+ ),
+ _ ⇒ checkExists(
+ "sort A C by A C",
+ srt(text = "sort", subjNotes = Seq("A", "C"), subjIndexes = Seq(1, 2), byNotes = Seq("A", "C"), byIndexes = Seq(4, 5)),
+ usr(text = "A", id = "A"),
+ usr(text = "C", id = "C"),
+ nlp(text = "by", isStop = true),
+ usr(text = "A", id = "A"),
+ usr(text = "C", id = "C")
+ ),
+ _ ⇒ checkExists(
+ "sort A B by A B",
+ srt(text = "sort", subjNotes = Seq("A", "B"), subjIndexes = Seq(1, 2), byNotes = Seq("A", "B"), byIndexes = Seq(4, 5)),
+ usr(text = "A", id = "A"),
+ usr(text = "B", id = "B"),
+ nlp(text = "by", isStop = true),
+ usr(text = "A", id = "A"),
+ usr(text = "B", id = "B")
+ ),
+ _ ⇒ checkExists(
+ "sort A B by A B",
+ srt(text = "sort", subjNote = "AB", subjIndex = 1, byNote = "AB", byIndex = 3),
+ usr(text = "A B", id = "AB"),
+ nlp(text = "by", isStop = true),
+ usr(text = "A B", id = "AB")
+ ),
+ _ ⇒ checkExists(
+ "A classify",
+ usr(text = "A", id = "A"),
+ srt(text = "classify", subjNote = "A", subjIndex = 0)
+ ),
+ _ ⇒ checkExists(
+ "the A the classify",
+ nlp(text = "the", isStop = true),
+ usr(text = "A", id = "A"),
+ nlp(text = "the", isStop = true),
+ srt(text = "classify", subjNote = "A", subjIndex = 1)
+ ),
+ _ ⇒ checkExists(
+ "segment A by top down",
+ srt(text = "segment", subjNote = "A", subjIndex = 1, asc = false),
+ usr(text = "A", id = "A"),
+ nlp(text = "by top down", isStop = true)
+ ),
+ _ ⇒ checkExists(
+ "segment A by in bottom up order",
+ srt(text = "segment", subjNote = "A", subjIndex = 1, asc = true),
+ usr(text = "A", id = "A"),
+ nlp(text = "by in bottom up order", isStop = true)
+ ),
+ _ ⇒ checkExists(
+ "the segment the A the by in bottom up the order the",
+ nlp(text = "the", isStop = true),
+ srt(text = "segment", subjNote = "A", subjIndex = 3, asc = true),
+ nlp(text = "the", isStop = true),
+ usr(text = "A", id = "A"),
+ nlp(text = "the by in bottom up the order the", isStop = true)
+ ),
+ _ ⇒ checkExists(
+ "A classify",
+ usr(text = "A", id = "A"),
+ srt(text = "classify", subjNote = "A", subjIndex = 0)
+ ),
+ _ ⇒ checkAll(
+ "A B classify",
+ Seq(
+ usr(text = "A B", id = "AB"),
+ srt(text = "classify", subjNote = "AB", subjIndex = 0)
+ ),
+ Seq(
+ usr(text = "A", id = "A"),
+ usr(text = "B", id = "B"),
+ srt(text = "classify", subjNotes = Seq("A", "B"), subjIndexes = Seq(0, 1))
+ )
+ ),
+ _ ⇒ checkAll(
+ "D classify",
+ Seq(
+ usr(text = "D", id = "D1"),
+ srt(text = "classify", subjNote = "D1", subjIndex = 0)
+ ),
+ Seq(
+ usr(text = "D", id = "D2"),
+ srt(text = "classify", subjNote = "D2", subjIndex = 0)
+ )
+ )
+ )
+}