You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nlpcraft.apache.org by se...@apache.org on 2021/09/18 09:05:14 UTC

[incubator-nlpcraft] 03/08: WIP.

This is an automated email from the ASF dual-hosted git repository.

sergeykamov pushed a commit to branch NLPCRAFT-443
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git

commit 1ccf5c5cc5a489b2565500d31209e4af19aa8187
Author: Sergey Kamov <sk...@gmail.com>
AuthorDate: Thu Sep 16 12:10:22 2021 +0300

    WIP.
---
 .../mgrs/nlp/enrichers/model/NCModelEnricher.scala | 25 ++++++++++++++++++----
 .../model/NCEnricherNestedModelSpec.scala          |  3 +--
 .../nlp/enrichers/sort/NCEnricherSortSpec.scala    |  3 +--
 3 files changed, 23 insertions(+), 8 deletions(-)

diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
index 6908265..22af412 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
@@ -450,6 +450,22 @@ object NCModelEnricher extends NCProbeEnricher {
             )
     }
 
+    /**
+      *
+      * @param matched
+      * @param toks2Match
+      */
+    private def getSparsedTokens(matched: Seq[NlpToken], toks2Match: Seq[NlpToken]): Seq[NlpToken] = {
+        require(matched.nonEmpty)
+
+        // Matched tokens should be already sorted.
+        val stopsInside = toks2Match.filter(t =>
+            t.isStopWord && !matched.contains(matched) && t.index > matched.head.index && t.index < matched.last.index
+        )
+
+        if (stopsInside.nonEmpty) (matched ++ stopsInside).sortBy(_.index) else matched
+    }
+
     @throws[NCE]
     override def enrich(mdl: NCProbeModel, ns: Sentence, senMeta: Map[String, Serializable], parent: Span = null): Unit = {
         require(isStarted)
@@ -526,9 +542,10 @@ object NCModelEnricher extends NCProbeEnricher {
                                 for (s <- get(mdl.sparseSynonyms, eId))
                                     s.sparseMatch(toks) match {
                                         case Some(res) =>
-                                            println("!!!toks="+toks.map(_.origText))
-                                            println("!!!res="+res.map(_.origText))
-                                            add("simple sparse", ns, contCache, eId, greedy, res, idxs, s)
+//                                            println("!!!toks="+toks.map(_.origText))
+//                                            println("!!!res="+res.map(_.origText))
+//                                            println
+                                            add("simple sparse", ns, contCache, eId, greedy, getSparsedTokens(res, toks), idxs, s)
                                         case None => // No-op.
                                     }
                         }
@@ -566,7 +583,7 @@ object NCModelEnricher extends NCProbeEnricher {
                                         case Some(res) =>
                                             val typ = if (s.sparse) "IDL sparse" else "IDL continuous"
 
-                                            add(typ, ns, contCache, eId, greedy, toTokens(res, ns), idxs, s, toParts(res, s))
+                                            add(typ, ns, contCache, eId, greedy, getSparsedTokens(toTokens(res, ns), toTokens(comb.map(_.data), ns)), idxs, s, toParts(res, s))
 
                                             idlCache += comb
                                         case None => // No-op.
diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCEnricherNestedModelSpec.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCEnricherNestedModelSpec.scala
index 4d5d991..8b25e87 100644
--- a/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCEnricherNestedModelSpec.scala
+++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCEnricherNestedModelSpec.scala
@@ -94,8 +94,7 @@ class NCEnricherNestedModelSpec2 extends NCEnricherNestedModelSpec1 {
             ),
             _ => checkExists(
                 "y the y",
-                usr(text = "y y", id = "y3"),
-                nlp(text = "the", isStop = true)
+                usr(text = "y the y", id = "y3")
             ),
             _ => checkExists(
                 "y xxx y",
diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/sort/NCEnricherSortSpec.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/sort/NCEnricherSortSpec.scala
index 228885d..7b8d858 100644
--- a/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/sort/NCEnricherSortSpec.scala
+++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/sort/NCEnricherSortSpec.scala
@@ -224,8 +224,7 @@ class NCEnricherSortSpec extends NCEnricherBaseSpec {
             _ => checkExists(
                 "sort A the A the A",
                 srt(text = "sort", typ = SUBJ_ONLY, note = "wrapperA", index = 1),
-                usr("A A A", "wrapperA"),
-                nlp("the the", isStop = true)
+                usr("A the A the A", "wrapperA")
             )
         )
 }