You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nlpcraft.apache.org by se...@apache.org on 2020/03/12 11:55:28 UTC
[incubator-nlpcraft] 02/03: WIP.
This is an automated email from the ASF dual-hosted git repository.
sergeykamov pushed a commit to branch NLPCRAFT-2
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
commit b53b17cd9aa9adc2376f0f0b7a87173caa31893d
Author: Sergey Kamov <se...@apache.org>
AuthorDate: Thu Mar 12 12:36:33 2020 +0300
WIP.
---
.../org/nlpcraft/model/impl/NCTokenLogger.scala | 24 +++-
.../nlp/enrichers/post/NCPostEnrichProcessor.scala | 145 +++++++++++++++------
2 files changed, 124 insertions(+), 45 deletions(-)
diff --git a/src/main/scala/org/nlpcraft/model/impl/NCTokenLogger.scala b/src/main/scala/org/nlpcraft/model/impl/NCTokenLogger.scala
index e6b603f..2968839 100644
--- a/src/main/scala/org/nlpcraft/model/impl/NCTokenLogger.scala
+++ b/src/main/scala/org/nlpcraft/model/impl/NCTokenLogger.scala
@@ -465,11 +465,29 @@ object NCTokenLogger extends LazyLogging {
s"type=$t, indexes=[$getIndexes], note=$note"
case "nlpcraft:sort" ⇒
- val asc: Boolean = get("asc")
- val note = mkString("note")
+ def n2s(l: java.util.List[String]): String = l.asScala.mkString(", ")
+ def i2s(l: java.util.List[java.util.List[Int]]): String =
+ l.asScala.map(_.asScala).map(p ⇒ s"[${p.mkString(", ")}]").mkString(", ")
+
+ val subjNotes: java.util.List[String] = get("subjnotes")
+ val subjIndexes: java.util.List[java.util.List[Int]] = get("subjindexes")
+
+ var s = s"subjNotes=${n2s(subjNotes)}, subjIndexes=[${i2s(subjIndexes)}]"
+
+ if (has("asc"))
+ s = s"$s, asc=${get("asc")}"
- s"asc=$asc, indexes=[$getIndexes], note=$note"
+ val byNotesOpt: Option[java.util.List[String]] = getOpt("bynotes")
+ byNotesOpt match {
+ case Some(byNotes) ⇒
+ val byIndexes: java.util.List[java.util.List[Int]] = get("byindexes")
+
+ s = s"$s, byNotes=${n2s(byNotes)}, byIndexes=[${i2s(byIndexes)}]"
+ case None ⇒ // No-op.
+ }
+
+ s
case "nlpcraft:limit" ⇒
val limit = mkDouble3("limit")
val note = mkString("note")
diff --git a/src/main/scala/org/nlpcraft/probe/mgrs/nlp/enrichers/post/NCPostEnrichProcessor.scala b/src/main/scala/org/nlpcraft/probe/mgrs/nlp/enrichers/post/NCPostEnrichProcessor.scala
index 6d71975..b41ce7b 100644
--- a/src/main/scala/org/nlpcraft/probe/mgrs/nlp/enrichers/post/NCPostEnrichProcessor.scala
+++ b/src/main/scala/org/nlpcraft/probe/mgrs/nlp/enrichers/post/NCPostEnrichProcessor.scala
@@ -231,9 +231,10 @@ object NCPostEnrichProcessor extends NCService with LazyLogging {
Seq(
"nlpcraft:aggregation",
"nlpcraft:relation",
- "nlpcraft:sort",
"nlpcraft:limit"
- ).forall(t ⇒ fixIndexesReferences(t, ns, history))
+ ).forall(t ⇒ fixIndexesReferences(t, ns, history) &&
+ fixIndexesReferencesList("nlpcraft:limit", "subjIndexes", "subjNotes", ns, history) &&
+ fixIndexesReferencesList("nlpcraft:limit", "byIndexes", "byNotes", ns, history))
if (res)
// Validation (all indexes calculated well)
@@ -251,7 +252,57 @@ object NCPostEnrichProcessor extends NCService with LazyLogging {
}
/**
+ *
+ * @param ns
+ * @param idxs
+ * @param notesType
+ * @param id
+ * @return
+ */
+ private def checkRelation(ns: NCNlpSentence, idxs: Seq[Int], notesType: String, id: String): Boolean = {
+ val types =
+ idxs.flatMap(idx ⇒ {
+ val types = ns(idx).map(p ⇒ p).filter(!_.isNlp).map(_.noteType)
+
+ types.size match {
+ case 0 ⇒ None
+ case 1 ⇒ Some(types.head)
+ case _ ⇒ throw new AssertionError(s"Unexpected tokes: ${ns(idx)}")
+ }
+ }).distinct
+
+ /**
+ Example:
+ 1. Sentence 'maximum x' (single element related function)
+ - maximum is aggregate function linked to date element.
+ - x defined as 2 elements: date and num.
+ So, the variant 'maximum x (as num)' should be excluded.
+
+ 2. Sentence 'compare x and y' (multiple elements related function)
+ - compare is relation function linked to date element.
+ - x an y defined as 2 elements: date and num.
+ So, variants 'x (as num) and x (as date)' and 'x (as date) and x (as num)'
+ should't be excluded, but invalid relation should be deleted for these combinations.
+ */
+
+ types.size match {
+ case 0 ⇒ throw new AssertionError("Unexpected empty types")
+ case 1 ⇒ types.head == notesType
+ case _ ⇒
+ // Equal elements should be processed together with function element.
+ if (types.size == 1)
+ false
+ else {
+ ns.removeNote(id)
+
+ true
+ }
+ }
+ }
+
+ /**
* Fixes notes with references to other notes indexes.
+ * Note that 'idxsField' is 'indexes' and 'noteField' is 'note' for all kind of references.
*
* @param noteType Note type.
* @param ns Sentence.
@@ -262,7 +313,7 @@ object NCPostEnrichProcessor extends NCService with LazyLogging {
ns.filter(_.isTypeOf(noteType)).foreach(tok ⇒
tok.getNoteOpt(noteType, "indexes") match {
case Some(n) ⇒
- val idxs = n.data[java.util.List[Int]]("indexes").asScala
+ val idxs: Seq[Int] = n.data[java.util.List[Int]]("indexes").asScala
var fixed = idxs
history.foreach { case (idxOld, idxNew) ⇒ fixed = fixed.map(i ⇒ if (i == idxOld) idxNew else i) }
@@ -280,48 +331,58 @@ object NCPostEnrichProcessor extends NCService with LazyLogging {
}
)
- ns.flatMap(_.getNotes(noteType)).forall(p = rel ⇒ {
- val idxs = rel.data[java.util.List[Int]]("indexes")
- val notesType = rel.data[String]("note")
+ ns.flatMap(_.getNotes(noteType)).forall(
+ n ⇒ checkRelation(ns, n.data[java.util.List[Int]]("indexes").asScala, n.data[String]("note"), n.id)
+ )
+ }
+
+ /**
+ * Fixes notes with references list to other notes indexes.
+ *
+ * @param noteType Note type.
+ * @param idxsField Indexes field.
+ * @param noteField Note field.
+ * @param ns Sentence.
+ * @param history Indexes transformation history.
+ * @return Valid flag.
+ */
+ private def fixIndexesReferencesList(
+ noteType: String,
+ idxsField: String,
+ noteField: String,
+ ns: NCNlpSentence,
+ history: Seq[(Int, Int)]
+ ): Boolean = {
+ ns.filter(_.isTypeOf(noteType)).foreach(tok ⇒
+ tok.getNoteOpt(noteType, idxsField) match {
+ case Some(n) ⇒
+ val idxs: Seq[Seq[Int]] = n.data[java.util.List[java.util.List[Int]]](idxsField).asScala.map(_.asScala)
+ var fixed = idxs
+
+ history.foreach { case (idxOld, idxNew) ⇒ fixed = fixed.map(_.map(i ⇒ if (i == idxOld) idxNew else i)) }
- val types =
- idxs.asScala.flatMap(idx ⇒ {
- val types = ns(idx).map(p ⇒ p).filter(!_.isNlp).map(_.noteType)
+ fixed = fixed.distinct
- types.size match {
- case 0 ⇒ None
- case 1 ⇒ Some(types.head)
- case _ ⇒ throw new AssertionError(s"Unexpected tokes: ${ns(idx)}")
- }
- }).distinct
-
- /**
- Example:
- 1. Sentence 'maximum x' (single element related function)
- - maximum is aggregate function linked to date element.
- - x defined as 2 elements: date and num.
- So, the variant 'maximum x (as num)' should be excluded.
-
- 2. Sentence 'compare x and y' (multiple elements related function)
- - compare is relation function linked to date element.
- - x an y defined as 2 elements: date and num.
- So, variants 'x (as num) and x (as date)' and 'x (as date) and x (as num)'
- should't be excluded, but invalid relation should be deleted for these combinations.
- */
-
- types.size match {
- case 0 ⇒ throw new AssertionError("Unexpected empty types")
- case 1 ⇒ types.head == notesType
- case _ ⇒
- // Equal elements should be processed together with function element.
- if (types.size == 1)
- false
- else {
- ns.removeNote(rel.id)
-
- true
- }
+ if (idxs != fixed) {
+ n += idxsField → fixed.map(_.asJava).asJava.asInstanceOf[java.io.Serializable]
+
+ def x(seq: Seq[Seq[Int]]): String = s"[${seq.map(p ⇒ s"[${p.mkString(",")}]").mkString(", ")}]"
+
+ logger.trace(s"`$noteType` note `indexes` fixed [old=${x(idxs)}}, new=${x(fixed)}]")
}
+ case None ⇒ // No-op.
+ }
+ )
+
+ ns.flatMap(_.getNotes(noteType)).forall(rel ⇒ {
+ val idxsList: util.List[util.List[Int]] = rel.data[java.util.List[java.util.List[Int]]](idxsField)
+ val notesTypes = rel.data[util.List[String]](noteField)
+
+ require(idxsList.size() == notesTypes.size())
+
+ idxsList.asScala.zip(notesTypes.asScala).forall {
+ case (idxs, notesType) ⇒ checkRelation(ns, idxs.asScala, notesType, rel.id)
+ }
})
}