You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nlpcraft.apache.org by ar...@apache.org on 2021/03/10 00:37:49 UTC

[incubator-nlpcraft] 06/17: WIP.

This is an automated email from the ASF dual-hosted git repository.

aradzinski pushed a commit to branch NLPCRAFT-261
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git

commit 3b5a380390d346b24391311c7ec771c4ff9f1fac
Author: Sergey Kamov <sk...@gmail.com>
AuthorDate: Sun Mar 7 15:45:09 2021 +0300

    WIP.
---
 .../apache/nlpcraft/common/nlp/NCNlpSentence.scala | 25 +++++++++++++------
 .../nlpcraft/common/util/NCComboRecursiveTask.java | 29 ++++++++--------------
 .../model/NCEnricherNestedModelSpec.scala          | 18 +++++++-------
 3 files changed, 37 insertions(+), 35 deletions(-)

diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala
index 9d9cb98..95a98a3 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala
@@ -626,6 +626,8 @@ object NCNlpSentence extends LazyLogging {
                 flatMap(note ⇒ getNotNlpNotes(note.tokenIndexes.sorted.map(i ⇒ thisSen(i))).filter(_ != note)).
                 distinct
 
+        println("delCombs="+delCombs.mkString("\n"))
+
         // Optimization. Deletes all wholly swallowed notes.
         val links = getLinks(thisSen.flatten)
 
@@ -670,6 +672,7 @@ object NCNlpSentence extends LazyLogging {
 //        }
 
         val dict = mutable.HashMap.empty[String, NCNlpSentenceNote]
+        val dictBack = mutable.HashMap.empty[NCNlpSentenceNote, String]
 
         var i = 'A'
 
@@ -677,11 +680,20 @@ object NCNlpSentence extends LazyLogging {
             toksByIdx.map(seq ⇒ {
                 seq.map(
                     n ⇒ {
-                        val s = s"$i"
+                        val s =
+                            dictBack.get(n) match {
+                                case Some(s) ⇒ s
+                                case None ⇒ {
+                                    val s = s"$i"
+
+                                    i = (i.toInt + 1).toChar
 
-                        i = (i.toInt + 1).toChar
+                                    dict += s → n
+                                    dictBack += n → s
 
-                        dict += s → n
+                                    s
+                                }
+                            }
 
                         s
                     }
@@ -695,16 +707,13 @@ object NCNlpSentence extends LazyLogging {
                 val p = new ForkJoinPool()
 
                 val tmp = NCComboRecursiveTask.findCombinations(
-                    converted.map(_.asJava).asJava,
-                    new Comparator[String]() {
-                        override def compare(n1: String, n2: String): Int = n1.compareTo(n2)
-                    },
+                    toksByIdx.map(_.asJava).asJava,
                     p
                 )
 
                 p.shutdown()
 
-                val seq1 = tmp.asScala.map(_.asScala.map(dict))
+                val seq1 = tmp.asScala.map(_.asScala)
 
                 val sens =
                     seq1.
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/util/NCComboRecursiveTask.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/util/NCComboRecursiveTask.java
index 017c10e..834735b 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/util/NCComboRecursiveTask.java
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/util/NCComboRecursiveTask.java
@@ -19,7 +19,9 @@ package org.apache.nlpcraft.common.util;
 
 import java.util.ArrayList;
 import java.util.Collection;
+import java.util.Collections;
 import java.util.Comparator;
+import java.util.HashSet;
 import java.util.List;
 import java.util.concurrent.ForkJoinPool;
 import java.util.concurrent.RecursiveTask;
@@ -42,30 +44,17 @@ public class NCComboRecursiveTask extends RecursiveTask<List<Long>> {
         this.wordCounts = wordCounts;
     }
 
-    public static <T> List<List<T>> findCombinations(List<List<T>> inp, Comparator<T> comparator, ForkJoinPool pool) {
-        List<List<T>> uniqueInp = inp.stream()
-            .filter(row -> inp.stream().noneMatch(it -> !it.equals(row)  && it.containsAll(row)))
-            .map(i -> i.stream().distinct().sorted(comparator).collect(toList()))
-            .collect(toList());
-
-
-        System.out.println("!!!");
-        for (List<T> ts : uniqueInp) {
-            System.out.println("!!!ts=");
-            System.out.println(ts.stream().map(Object::toString).collect(Collectors.joining("\n")));
-        }
-        System.out.println("!!!");
+    public static <T> List<List<T>> findCombinations(List<List<T>> inp, ForkJoinPool pool) {
+        List<List<T>> uniqueInp = inp;
 
         // Build dictionary of unique words.
         List<T> dict = uniqueInp.stream()
             .flatMap(Collection::stream)
             .distinct()
-            .sorted(comparator)
             .collect(toList());
 
-        System.out.println("dict=");
-        System.out.println(dict.stream().map(Object::toString).collect(Collectors.joining("\n")));
-        System.out.println();
+        System.out.println("inp=" + inp);
+        System.out.println("dict=" + dict);
 
         if (dict.size() > Long.SIZE) {
             // Note: Power set of 64 words results in 9223372036854775807 combinations.
@@ -87,7 +76,11 @@ public class NCComboRecursiveTask extends RecursiveTask<List<Long>> {
 
         NCComboRecursiveTask task = new NCComboRecursiveTask(lo, hi, wordBits, wordCounts);
 
-        return pool.invoke(task).stream().map(bits -> bitsToWords(bits, dict)).collect(toList());
+        final List<List<T>> res = pool.invoke(task).stream().map(bits -> bitsToWords(bits, dict)).collect(toList());
+
+        System.out.println("res=" + res);
+
+        return res;
     }
 
     @Override
diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCEnricherNestedModelSpec.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCEnricherNestedModelSpec.scala
index 9290d56..658feda 100644
--- a/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCEnricherNestedModelSpec.scala
+++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCEnricherNestedModelSpec.scala
@@ -47,15 +47,15 @@ class NCEnricherNestedModelSpec extends NCEnricherBaseSpec {
     @Test
     def test(): Unit =
         runBatch(
-            _ ⇒ checkExists(
-                "tomorrow",
-                usr(text = "tomorrow", id = "x3")
-            ),
-            _ ⇒ checkExists(
-                "tomorrow yesterday",
-                usr(text = "tomorrow", id = "x3"),
-                usr(text = "yesterday", id = "x3")
-            ),
+//            _ ⇒ checkExists(
+//                "tomorrow",
+//                usr(text = "tomorrow", id = "x3")
+//            ),
+//            _ ⇒ checkExists(
+//                "tomorrow yesterday",
+//                usr(text = "tomorrow", id = "x3"),
+//                usr(text = "yesterday", id = "x3")
+//            ),
             _ ⇒ checkExists(
                 "y y",
                 usr(text = "y y", id = "y3")