You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nlpcraft.apache.org by se...@apache.org on 2020/03/17 13:49:58 UTC

[incubator-nlpcraft] branch NLPCRAFT-16 updated (9edb5dd -> ecef2c6)

This is an automated email from the ASF dual-hosted git repository.

sergeykamov pushed a change to branch NLPCRAFT-16
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git.


    from 9edb5dd  WIP.
     new 7a90b18  WIP.
     new ecef2c6  WIP.

The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 .../nlp/enrichers/post/NCPostEnrichProcessor.scala | 58 ++++++++--------
 .../mgrs/nlp/enrichers/sort/NCSortEnricher.scala   | 78 +++++++++++++---------
 .../mgrs/nlp/enrichers/NCEnricherBaseSpec.scala    | 18 +++--
 .../nlp/enrichers/sort/NCEnricherSortSpec.scala    | 44 ++++++++----
 4 files changed, 122 insertions(+), 76 deletions(-)


[incubator-nlpcraft] 01/02: WIP.

Posted by se...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

sergeykamov pushed a commit to branch NLPCRAFT-16
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git

commit 7a90b18c533211f325b668da57ad7bb1f59e5889
Author: Sergey Kamov <se...@apache.org>
AuthorDate: Tue Mar 17 12:37:36 2020 +0300

    WIP.
---
 .../nlp/enrichers/post/NCPostEnrichProcessor.scala |  8 ++-
 .../mgrs/nlp/enrichers/sort/NCSortEnricher.scala   | 78 +++++++++++++---------
 .../mgrs/nlp/enrichers/NCEnricherBaseSpec.scala    | 16 +++--
 .../nlp/enrichers/sort/NCEnricherSortSpec.scala    | 24 +++----
 4 files changed, 74 insertions(+), 52 deletions(-)

diff --git a/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/post/NCPostEnrichProcessor.scala b/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/post/NCPostEnrichProcessor.scala
index 176e4a6..7a77713 100644
--- a/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/post/NCPostEnrichProcessor.scala
+++ b/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/post/NCPostEnrichProcessor.scala
@@ -132,9 +132,11 @@ object NCPostEnrichProcessor extends NCService with LazyLogging {
                     (
                         p.wordIndexes,
                         p.noteType,
-                        p.get("asc"),
-                        p.get("indexes"),
-                        p.get("note")
+                        p.get("subjNotes"),
+                        p.get("subjIndexes"),
+                        p.getOrElse("byNotes", null),
+                        p.getOrElse("byIndexes", null),
+                        p.getOrElse("asc", null)
                     )
                 case "nlpcraft:limit" ⇒
                     (
diff --git a/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/sort/NCSortEnricher.scala b/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/sort/NCSortEnricher.scala
index 436aafa..3484c5d 100644
--- a/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/sort/NCSortEnricher.scala
+++ b/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/sort/NCSortEnricher.scala
@@ -81,7 +81,10 @@ object NCSortEnricher extends NCProbeEnricher {
             "SORT x BY ORDER"
         )
 
-    case class NoteData(note: String, indexes: Seq[Int])
+    case class NoteData(note: String, indexes: Seq[Int]) {
+        // Added for debug reasons.
+        override def toString: String = s"NoteData [note=$note, indexes=[${indexes.mkString(",")}]]"
+    }
 
     private case class Match(
         asc: Option[Boolean],
@@ -147,47 +150,60 @@ object NCSortEnricher extends NCProbeEnricher {
       *
       * @param toks
       */
-    private def split(toks: Seq[NCNlpSentenceToken]): Seq[Seq[NoteData]] = {
+    private def split(toks: Seq[NCNlpSentenceToken], nullable: Boolean): Seq[Seq[NoteData]] = {
         val all =
             toks.flatten.
                 filter(!_.isNlp).map(n ⇒ NoteData(n.noteType, n.tokenFrom to n.tokenTo)).
                 sortBy(_.indexes.head).distinct
 
-        if (all.nonEmpty) {
-            val res = mutable.ArrayBuffer.empty[Seq[NoteData]]
+        val res =
+            if (all.nonEmpty) {
+                val res = mutable.ArrayBuffer.empty[Seq[NoteData]]
 
-            /**
-              * Returns flag which indicates are token contiguous or not.
-              *
-              * @param tok1Idx First token index.
-              * @param tok2Idx Second token index.
-              */
-            def contiguous(tok1Idx: Int, tok2Idx: Int): Boolean = {
-                val between = toks.filter(t ⇒ t.index > tok1Idx && t.index < tok2Idx)
+                /**
+                  * Returns flag which indicates are token contiguous or not.
+                  *
+                  * @param tok1Idx First token index.
+                  * @param tok2Idx Second token index.
+                  */
+                def contiguous(tok1Idx: Int, tok2Idx: Int): Boolean = {
+                    val between = toks.filter(t ⇒ t.index > tok1Idx && t.index < tok2Idx)
 
-                between.isEmpty || between.forall(p ⇒ p.isStopWord || p.stem == STEM_AND)
-            }
+                    between.isEmpty || between.forall(p ⇒ p.isStopWord || p.stem == STEM_AND)
+                }
+
+                def fill(nd: NoteData, seq: mutable.ArrayBuffer[NoteData] = mutable.ArrayBuffer.empty[NoteData]): Unit = {
+                    seq += nd
 
-            def fill(nd: NoteData, seq: mutable.ArrayBuffer[NoteData] = mutable.ArrayBuffer.empty[NoteData]): Unit = {
-                seq += nd
+                    all.
+                        filter(p ⇒ nd.indexes.last < p.indexes.head && contiguous(nd.indexes.last, p.indexes.head)).
+                        foreach(fill(_, mutable.ArrayBuffer.empty[NoteData] ++ seq.clone()))
 
-                all.
-                    filter(p ⇒ nd.indexes.last < p.indexes.head && contiguous(nd.indexes.last, p.indexes.head)).
-                    foreach(fill(_, mutable.ArrayBuffer.empty[NoteData] ++ seq.clone()))
+                    if (seq.nonEmpty &&
+                        seq.head.indexes.head == all.head.indexes.head &&
+                        seq.last.indexes.last == all.last.indexes.last
+                    )
+                        res += seq
+                }
+
+                val minIndex = all.map(_.indexes.head).min
+
+                all.filter(_.indexes.head == minIndex).foreach(p ⇒ fill(p))
 
-                if (seq.nonEmpty &&
-                    seq.head.indexes.head == all.head.indexes.head &&
-                    seq.last.indexes.last == all.last.indexes.last
-                )
-                    res += seq
+                res
             }
+            else
+                Seq.empty
 
-            fill(all.head)
+        if (res.isEmpty && !nullable)
+            throw new AssertionError(s"Invalid nullable result " +
+                s"[tokens=[${toks.map(_.origText).mkString(",")}]" +
+                s", tokensIndexes=[${toks.map(_.index).mkString(",")}]" +
+                s", allData=[${all.mkString(",")}]" +
+                s"]"
+            )
 
-            res
-        }
-        else
-            Seq.empty
+        res
     }
 
     /**
@@ -300,8 +316,8 @@ object NCSortEnricher extends NCProbeEnricher {
                                 },
                                 main = h.sort.tokens,
                                 stop = h.byTokens ++ h.orderTokens,
-                                subjSeq = split(subj),
-                                bySeq = split(by)
+                                subjSeq = split(subj, nullable = false),
+                                bySeq = split(by, nullable = true)
                             )
                         )
                     }
diff --git a/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/NCEnricherBaseSpec.scala b/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/NCEnricherBaseSpec.scala
index 5e519a2..c59174f 100644
--- a/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/NCEnricherBaseSpec.scala
+++ b/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/NCEnricherBaseSpec.scala
@@ -20,7 +20,7 @@ package org.apache.nlpcraft.probe.mgrs.nlp.enrichers
 import org.apache.nlpcraft.model.tools.test.{NCTestClient, NCTestClientBuilder}
 import org.apache.nlpcraft.probe.embedded.NCEmbeddedProbe
 
-import org.junit.jupiter.api.Assertions.assertTrue
+import org.junit.jupiter.api.Assertions.{assertTrue, fail}
 import org.junit.jupiter.api.{AfterEach, BeforeEach}
 import org.scalatest.Assertions
 
@@ -51,8 +51,10 @@ class NCEnricherBaseSpec {
     private[enrichers] def checkExists(txt: String, expToks: NCTestToken*): Unit = {
         val res = client.ask(txt)
 
-        assertTrue(res.isOk, s"Result is not successful, result: $res")
-        assertTrue(res.getResult.isPresent, s"Result is not successful, result: $res")
+        if (res.isFailed)
+            fail(s"Result failed [text=$txt, error=${res.getResultError.get()}]")
+
+        assertTrue(res.getResult.isPresent, s"Missed result data")
 
         val sens = NCTestSentence.deserialize(res.getResult.get())
         val expSen = NCTestSentence(expToks)
@@ -72,8 +74,10 @@ class NCEnricherBaseSpec {
     private[enrichers] def checkAll(txt: String, expToks: Seq[NCTestToken]*): Unit = {
         val res = client.ask(txt)
 
-        assertTrue(res.isOk, s"Result is not successful, result: $res")
-        assertTrue(res.getResult.isPresent, s"Result is not successful, result: $res")
+        if (res.isFailed)
+            fail(s"Result failed [text=$txt, error=${res.getResultError.get()}]")
+
+        assertTrue(res.getResult.isPresent, s"Missed result data")
 
         val expSens = expToks.map(NCTestSentence(_))
         val sens = NCTestSentence.deserialize(res.getResult.get())
@@ -108,7 +112,7 @@ class NCEnricherBaseSpec {
 
         if (errs.nonEmpty) {
             errs.foreach { case (err, i) ⇒
-                System.err.println(s"${i + 1} test failed.")
+                System.err.println(s"${i + 1}. Test failed: ${err.getLocalizedMessage}")
 
                 err.printStackTrace()
             }
diff --git a/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/sort/NCEnricherSortSpec.scala b/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/sort/NCEnricherSortSpec.scala
index 43e0e39..854a002 100644
--- a/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/sort/NCEnricherSortSpec.scala
+++ b/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/sort/NCEnricherSortSpec.scala
@@ -100,17 +100,17 @@ class NCEnricherSortSpec extends NCEnricherBaseSpec {
                 usr(text = "A", id = "A"),
                 srt(text = "classify", subjNote = "A", subjIndex = 0)
             )
-//            ,
-//            _ ⇒ checkAll(
-//                "D classify",
-//                Seq(
-//                    usr(text = "D", id = "D1"),
-//                    srt(text = "classify", subjNote = "D1", subjIndex = 0)
-//                ),
-//                Seq(
-//                    usr(text = "D", id = "D2"),
-//                    srt(text = "classify", subjNote = "D2", subjIndex = 0)
-//                )
-//            )
+            ,
+            _ ⇒ checkAll(
+                "D classify",
+                Seq(
+                    usr(text = "D", id = "D1"),
+                    srt(text = "classify", subjNote = "D1", subjIndex = 0)
+                ),
+                Seq(
+                    usr(text = "D", id = "D2"),
+                    srt(text = "classify", subjNote = "D2", subjIndex = 0)
+                )
+            )
         )
 }


[incubator-nlpcraft] 02/02: WIP.

Posted by se...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

sergeykamov pushed a commit to branch NLPCRAFT-16
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git

commit ecef2c60bbf809819226ddb5f4ef27b8c3d81ac8
Author: Sergey Kamov <se...@apache.org>
AuthorDate: Tue Mar 17 16:49:52 2020 +0300

    WIP.
---
 .../nlp/enrichers/post/NCPostEnrichProcessor.scala | 50 ++++++++++++----------
 .../mgrs/nlp/enrichers/sort/NCSortEnricher.scala   |  8 ++--
 .../mgrs/nlp/enrichers/NCEnricherBaseSpec.scala    |  2 +-
 .../nlp/enrichers/sort/NCEnricherSortSpec.scala    | 24 ++++++++++-
 4 files changed, 54 insertions(+), 30 deletions(-)

diff --git a/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/post/NCPostEnrichProcessor.scala b/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/post/NCPostEnrichProcessor.scala
index 7a77713..3128eaf 100644
--- a/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/post/NCPostEnrichProcessor.scala
+++ b/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/post/NCPostEnrichProcessor.scala
@@ -353,7 +353,9 @@ object NCPostEnrichProcessor extends NCService with LazyLogging {
         ns: NCNlpSentence,
         history: Seq[(Int, Int)]
     ): Boolean = {
-        ns.filter(_.isTypeOf(noteType)).foreach(tok ⇒
+        var ok = true
+
+        for (tok ← ns.filter(_.isTypeOf(noteType)) if ok)
             tok.getNoteOpt(noteType, idxsField) match {
                 case Some(n) ⇒
                     val idxs: Seq[Seq[Int]] = n.data[java.util.List[java.util.List[Int]]](idxsField).asScala.map(_.asScala)
@@ -361,32 +363,34 @@ object NCPostEnrichProcessor extends NCService with LazyLogging {
 
                     history.foreach { case (idxOld, idxNew) ⇒ fixed = fixed.map(_.map(i ⇒ if (i == idxOld) idxNew else i).distinct) }
 
-                    fixed.foreach(p ⇒ require(p.size == 1))
+                    if (fixed.forall(_.size == 1)) {
+                        // Fix double dimension array to one dimension,
+                        // so it should be called always inspite of fixIndexesReferences method.
+                        n += idxsField → fixed.map(_.head).asJava.asInstanceOf[java.io.Serializable]
 
-                    // Fix double dimension array to one dimension,
-                    // so it should be called always inspite of fixIndexesReferences method.
-                    n += idxsField → fixed.map(_.head).asJava.asInstanceOf[java.io.Serializable]
+                        def x(seq: Seq[Seq[Int]]): String = s"[${seq.map(p ⇒ s"[${p.mkString(",")}]").mkString(", ")}]"
 
-                    def x(seq: Seq[Seq[Int]]): String = s"[${seq.map(p ⇒ s"[${p.mkString(",")}]").mkString(", ")}]"
-
-                    logger.trace(s"`$noteType` note `indexes` fixed [old=${x(idxs)}}, new=${x(fixed)}]")
+                        logger.trace(s"`$noteType` note `indexes` fixed [old=${x(idxs)}}, new=${x(fixed)}]")
+                    }
+                    else
+                        ok = false
                 case None ⇒ // No-op.
             }
-        )
 
-        ns.flatMap(_.getNotes(noteType)).forall(rel ⇒
-            rel.dataOpt[java.util.List[Int]](idxsField) match {
-                case Some(idxsList) ⇒
-                    val notesTypes = rel.data[util.List[String]](noteField)
+        ok &&
+            ns.flatMap(_.getNotes(noteType)).forall(rel ⇒
+                rel.dataOpt[java.util.List[Int]](idxsField) match {
+                    case Some(idxsList) ⇒
+                        val notesTypes = rel.data[util.List[String]](noteField)
 
-                    require(idxsList.size() == notesTypes.size())
+                        require(idxsList.size() == notesTypes.size())
 
-                    idxsList.asScala.zip(notesTypes.asScala).forall {
-                        case (idxs, notesType) ⇒ checkRelation(ns, Seq(idxs), notesType, rel.id)
-                    }
-                case None ⇒ true
-            }
-        )
+                        idxsList.asScala.zip(notesTypes.asScala).forall {
+                            case (idxs, notesType) ⇒ checkRelation(ns, Seq(idxs), notesType, rel.id)
+                        }
+                    case None ⇒ true
+                }
+            )
     }
 
     /**
@@ -675,8 +679,6 @@ object NCPostEnrichProcessor extends NCService with LazyLogging {
             def getNotNlpNotes(toks: Seq[NCNlpSentenceToken]): Seq[NCNlpSentenceNote] =
                 toks.flatten.filter(!_.isNlp).distinct
 
-            val notNlpTypes = getNotNlpNotes(ns).map(_.noteType).distinct
-
             val delCombs: Seq[NCNlpSentenceNote] =
                 getNotNlpNotes(ns).
                     flatMap(note ⇒ getNotNlpNotes(ns.slice(note.tokenFrom, note.tokenTo + 1)).filter(_ != note)).
@@ -713,6 +715,8 @@ object NCPostEnrichProcessor extends NCService with LazyLogging {
 
                                     deleted += delComb
 
+                                    val notNlpTypes = getNotNlpNotes(nsClone).map(_.noteType).distinct
+
                                     if (collapse(nsClone, notNlpTypes, idCache)) Some(nsClone) else None
                                 }
                                 else
@@ -760,7 +764,7 @@ object NCPostEnrichProcessor extends NCService with LazyLogging {
                     m.values.map(_.sentence).toSeq
                 }
                 else {
-                    if (collapse(ns, notNlpTypes, idCache)) Seq(ns) else Seq.empty
+                    if (collapse(ns, getNotNlpNotes(ns).map(_.noteType).distinct, idCache)) Seq(ns) else Seq.empty
                 }.distinct
 
             sens.foreach(sen ⇒
diff --git a/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/sort/NCSortEnricher.scala b/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/sort/NCSortEnricher.scala
index 3484c5d..683dd6f 100644
--- a/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/sort/NCSortEnricher.scala
+++ b/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/sort/NCSortEnricher.scala
@@ -196,10 +196,10 @@ object NCSortEnricher extends NCProbeEnricher {
                 Seq.empty
 
         if (res.isEmpty && !nullable)
-            throw new AssertionError(s"Invalid nullable result " +
-                s"[tokens=[${toks.map(_.origText).mkString(",")}]" +
-                s", tokensIndexes=[${toks.map(_.index).mkString(",")}]" +
-                s", allData=[${all.mkString(",")}]" +
+            throw new AssertionError(s"Invalid null result " +
+                s"[tokens=[${toks.map(_.origText).mkString(", ")}]" +
+                s", tokensIndexes=[${toks.map(_.index).mkString(", ")}]" +
+                s", allData=[${all.mkString(", ")}]" +
                 s"]"
             )
 
diff --git a/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/NCEnricherBaseSpec.scala b/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/NCEnricherBaseSpec.scala
index c59174f..8566a57 100644
--- a/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/NCEnricherBaseSpec.scala
+++ b/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/NCEnricherBaseSpec.scala
@@ -32,7 +32,7 @@ class NCEnricherBaseSpec {
 
     @BeforeEach
     private[enrichers] def setUp(): Unit = {
-        NCEmbeddedProbe.start(classOf[NCEnricherTestModel])
+        //NCEmbeddedProbe.start(classOf[NCEnricherTestModel])
 
         client = new NCTestClientBuilder().newBuilder.setResponseLog(false).build
 
diff --git a/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/sort/NCEnricherSortSpec.scala b/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/sort/NCEnricherSortSpec.scala
index 854a002..0588104 100644
--- a/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/sort/NCEnricherSortSpec.scala
+++ b/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/sort/NCEnricherSortSpec.scala
@@ -64,6 +64,22 @@ class NCEnricherSortSpec extends NCEnricherBaseSpec {
                 usr(text = "C", id = "C")
             ),
             _ ⇒ checkExists(
+                "sort A B by A B",
+                srt(text = "sort", subjNotes = Seq("A", "B"), subjIndexes = Seq(1, 2), byNotes = Seq("A", "B"), byIndexes = Seq(4, 5)),
+                usr(text = "A", id = "A"),
+                usr(text = "B", id = "B"),
+                nlp(text = "by", isStop = true),
+                usr(text = "A", id = "A"),
+                usr(text = "B", id = "B")
+            ),
+            _ ⇒ checkExists(
+                "sort A B by A B",
+                srt(text = "sort", subjNote = "AB", subjIndex = 1, byNote = "AB", byIndex = 3),
+                usr(text = "A B", id = "AB"),
+                nlp(text = "by", isStop = true),
+                usr(text = "A B", id = "AB")
+            ),
+            _ ⇒ checkExists(
                 "A classify",
                 usr(text = "A", id = "A"),
                 srt(text = "classify", subjNote = "A", subjIndex = 0)
@@ -99,8 +115,12 @@ class NCEnricherSortSpec extends NCEnricherBaseSpec {
                 "A classify",
                 usr(text = "A", id = "A"),
                 srt(text = "classify", subjNote = "A", subjIndex = 0)
-            )
-            ,
+            ),
+            _ ⇒ checkExists(
+                "A B classify",
+                usr(text = "A B", id = "AB"),
+                srt(text = "classify", subjNote = "AB", subjIndex = 0)
+            ),
             _ ⇒ checkAll(
                 "D classify",
                 Seq(