You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nlpcraft.apache.org by se...@apache.org on 2020/03/14 07:00:34 UTC

[incubator-nlpcraft] 02/13: WIP.

This is an automated email from the ASF dual-hosted git repository.

sergeykamov pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git

commit b53b17cd9aa9adc2376f0f0b7a87173caa31893d
Author: Sergey Kamov <se...@apache.org>
AuthorDate: Thu Mar 12 12:36:33 2020 +0300

    WIP.
---
 .../org/nlpcraft/model/impl/NCTokenLogger.scala    |  24 +++-
 .../nlp/enrichers/post/NCPostEnrichProcessor.scala | 145 +++++++++++++++------
 2 files changed, 124 insertions(+), 45 deletions(-)

diff --git a/src/main/scala/org/nlpcraft/model/impl/NCTokenLogger.scala b/src/main/scala/org/nlpcraft/model/impl/NCTokenLogger.scala
index e6b603f..2968839 100644
--- a/src/main/scala/org/nlpcraft/model/impl/NCTokenLogger.scala
+++ b/src/main/scala/org/nlpcraft/model/impl/NCTokenLogger.scala
@@ -465,11 +465,29 @@ object NCTokenLogger extends LazyLogging {
                             s"type=$t, indexes=[$getIndexes], note=$note"
 
                         case "nlpcraft:sort" ⇒
-                            val asc: Boolean = get("asc")
-                            val note = mkString("note")
+                            def n2s(l: java.util.List[String]): String = l.asScala.mkString(", ")
+                            def i2s(l: java.util.List[java.util.List[Int]]): String =
+                                l.asScala.map(_.asScala).map(p ⇒ s"[${p.mkString(", ")}]").mkString(", ")
+
+                            val subjNotes: java.util.List[String] = get("subjnotes")
+                            val subjIndexes: java.util.List[java.util.List[Int]] = get("subjindexes")
+
+                            var s = s"subjNotes=${n2s(subjNotes)}, subjIndexes=[${i2s(subjIndexes)}]"
+
+                            if (has("asc"))
+                                s = s"$s, asc=${get("asc")}"
 
-                            s"asc=$asc, indexes=[$getIndexes], note=$note"
+                            val byNotesOpt: Option[java.util.List[String]] = getOpt("bynotes")
 
+                            byNotesOpt match {
+                                case Some(byNotes) ⇒
+                                    val byIndexes: java.util.List[java.util.List[Int]] = get("byindexes")
+
+                                    s = s"$s, byNotes=${n2s(byNotes)}, byIndexes=[${i2s(byIndexes)}]"
+                                case None ⇒ // No-op.
+                            }
+
+                            s
                         case "nlpcraft:limit" ⇒
                             val limit = mkDouble3("limit")
                             val note = mkString("note")
diff --git a/src/main/scala/org/nlpcraft/probe/mgrs/nlp/enrichers/post/NCPostEnrichProcessor.scala b/src/main/scala/org/nlpcraft/probe/mgrs/nlp/enrichers/post/NCPostEnrichProcessor.scala
index 6d71975..b41ce7b 100644
--- a/src/main/scala/org/nlpcraft/probe/mgrs/nlp/enrichers/post/NCPostEnrichProcessor.scala
+++ b/src/main/scala/org/nlpcraft/probe/mgrs/nlp/enrichers/post/NCPostEnrichProcessor.scala
@@ -231,9 +231,10 @@ object NCPostEnrichProcessor extends NCService with LazyLogging {
             Seq(
                 "nlpcraft:aggregation",
                 "nlpcraft:relation",
-                "nlpcraft:sort",
                 "nlpcraft:limit"
-            ).forall(t ⇒ fixIndexesReferences(t, ns, history))
+            ).forall(t ⇒ fixIndexesReferences(t, ns, history) &&
+            fixIndexesReferencesList("nlpcraft:limit", "subjIndexes", "subjNotes", ns, history) &&
+            fixIndexesReferencesList("nlpcraft:limit", "byIndexes", "byNotes", ns, history))
 
         if (res)
             // Validation (all indexes calculated well)
@@ -251,7 +252,57 @@ object NCPostEnrichProcessor extends NCService with LazyLogging {
     }
 
     /**
+      *
+      * @param ns
+      * @param idxs
+      * @param notesType
+      * @param id
+      * @return
+      */
+    private def checkRelation(ns: NCNlpSentence, idxs: Seq[Int], notesType: String, id: String): Boolean = {
+        val types =
+            idxs.flatMap(idx ⇒ {
+                val types = ns(idx).map(p ⇒ p).filter(!_.isNlp).map(_.noteType)
+
+                types.size match {
+                    case 0 ⇒ None
+                    case 1 ⇒ Some(types.head)
+                    case _ ⇒ throw new AssertionError(s"Unexpected tokes: ${ns(idx)}")
+                }
+            }).distinct
+
+        /**
+        Example:
+             1. Sentence 'maximum x' (single element related function)
+              - maximum is aggregate function linked to date element.
+              - x defined as 2 elements: date and num.
+              So, the variant 'maximum x (as num)' should be excluded.
+
+              2. Sentence 'compare x and y' (multiple elements related function)
+              - compare is relation function linked to date element.
+              - x an y defined as 2 elements: date and num.
+              So, variants 'x (as num) and x (as date)'  and 'x (as date) and x (as num)'
+              should't be excluded, but invalid relation should be deleted for these combinations.
+          */
+
+        types.size match {
+            case 0 ⇒ throw new AssertionError("Unexpected empty types")
+            case 1 ⇒ types.head == notesType
+            case _ ⇒
+                // Equal elements should be processed together with function element.
+                if (types.size == 1)
+                    false
+                else {
+                    ns.removeNote(id)
+
+                    true
+                }
+        }
+    }
+
+    /**
       * Fixes notes with references to other notes indexes.
+      * Note that 'idxsField' is 'indexes' and 'noteField' is 'note' for all kind of references.
       *
       * @param noteType Note type.
       * @param ns Sentence.
@@ -262,7 +313,7 @@ object NCPostEnrichProcessor extends NCService with LazyLogging {
         ns.filter(_.isTypeOf(noteType)).foreach(tok ⇒
             tok.getNoteOpt(noteType, "indexes") match {
                 case Some(n) ⇒
-                    val idxs = n.data[java.util.List[Int]]("indexes").asScala
+                    val idxs: Seq[Int] = n.data[java.util.List[Int]]("indexes").asScala
                     var fixed = idxs
 
                     history.foreach { case (idxOld, idxNew) ⇒ fixed = fixed.map(i ⇒ if (i == idxOld) idxNew else i) }
@@ -280,48 +331,58 @@ object NCPostEnrichProcessor extends NCService with LazyLogging {
             }
         )
 
-        ns.flatMap(_.getNotes(noteType)).forall(p = rel ⇒ {
-            val idxs = rel.data[java.util.List[Int]]("indexes")
-            val notesType = rel.data[String]("note")
+        ns.flatMap(_.getNotes(noteType)).forall(
+            n ⇒ checkRelation(ns, n.data[java.util.List[Int]]("indexes").asScala, n.data[String]("note"), n.id)
+        )
+    }
+
+    /**
+      * Fixes notes with references list to other notes indexes.
+      *
+      * @param noteType Note type.
+      * @param idxsField Indexes field.
+      * @param noteField Note field.
+      * @param ns Sentence.
+      * @param history Indexes transformation history.
+      * @return Valid flag.
+      */
+    private def fixIndexesReferencesList(
+        noteType: String,
+        idxsField: String,
+        noteField: String,
+        ns: NCNlpSentence,
+        history: Seq[(Int, Int)]
+    ): Boolean = {
+        ns.filter(_.isTypeOf(noteType)).foreach(tok ⇒
+            tok.getNoteOpt(noteType, idxsField) match {
+                case Some(n) ⇒
+                    val idxs: Seq[Seq[Int]] = n.data[java.util.List[java.util.List[Int]]](idxsField).asScala.map(_.asScala)
+                    var fixed = idxs
+
+                    history.foreach { case (idxOld, idxNew) ⇒ fixed = fixed.map(_.map(i ⇒ if (i == idxOld) idxNew else i)) }
 
-                    val types =
-                        idxs.asScala.flatMap(idx ⇒ {
-                            val types = ns(idx).map(p ⇒ p).filter(!_.isNlp).map(_.noteType)
+                    fixed = fixed.distinct
 
-                            types.size match {
-                                case 0 ⇒ None
-                                case 1 ⇒ Some(types.head)
-                                case _ ⇒ throw new AssertionError(s"Unexpected tokes: ${ns(idx)}")
-                            }
-                        }).distinct
-
-                    /**
-                    Example:
-                     1. Sentence 'maximum x' (single element related function)
-                      - maximum is aggregate function linked to date element.
-                      - x defined as 2 elements: date and num.
-                      So, the variant 'maximum x (as num)' should be excluded.
-
-                      2. Sentence 'compare x and y' (multiple elements related function)
-                      - compare is relation function linked to date element.
-                      - x an y defined as 2 elements: date and num.
-                      So, variants 'x (as num) and x (as date)'  and 'x (as date) and x (as num)'
-                      should't be excluded, but invalid relation should be deleted for these combinations.
-                      */
-
-                    types.size match {
-                        case 0 ⇒ throw new AssertionError("Unexpected empty types")
-                        case 1 ⇒ types.head == notesType
-                        case _ ⇒
-                            // Equal elements should be processed together with function element.
-                            if (types.size == 1)
-                                false
-                            else {
-                                ns.removeNote(rel.id)
-
-                                true
-                            }
+                    if (idxs != fixed) {
+                        n += idxsField → fixed.map(_.asJava).asJava.asInstanceOf[java.io.Serializable]
+
+                        def x(seq: Seq[Seq[Int]]): String = s"[${seq.map(p ⇒ s"[${p.mkString(",")}]").mkString(", ")}]"
+
+                        logger.trace(s"`$noteType` note `indexes` fixed [old=${x(idxs)}}, new=${x(fixed)}]")
                     }
+                case None ⇒ // No-op.
+            }
+        )
+
+        ns.flatMap(_.getNotes(noteType)).forall(rel ⇒ {
+            val idxsList: util.List[util.List[Int]] = rel.data[java.util.List[java.util.List[Int]]](idxsField)
+            val notesTypes = rel.data[util.List[String]](noteField)
+
+            require(idxsList.size() == notesTypes.size())
+
+            idxsList.asScala.zip(notesTypes.asScala).forall {
+                case (idxs, notesType) ⇒ checkRelation(ns, idxs.asScala, notesType, rel.id)
+            }
         })
     }