You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nlpcraft.apache.org by ar...@apache.org on 2021/03/10 00:37:49 UTC
[incubator-nlpcraft] 06/17: WIP.
This is an automated email from the ASF dual-hosted git repository.
aradzinski pushed a commit to branch NLPCRAFT-261
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
commit 3b5a380390d346b24391311c7ec771c4ff9f1fac
Author: Sergey Kamov <sk...@gmail.com>
AuthorDate: Sun Mar 7 15:45:09 2021 +0300
WIP.
---
.../apache/nlpcraft/common/nlp/NCNlpSentence.scala | 25 +++++++++++++------
.../nlpcraft/common/util/NCComboRecursiveTask.java | 29 ++++++++--------------
.../model/NCEnricherNestedModelSpec.scala | 18 +++++++-------
3 files changed, 37 insertions(+), 35 deletions(-)
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala
index 9d9cb98..95a98a3 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala
@@ -626,6 +626,8 @@ object NCNlpSentence extends LazyLogging {
flatMap(note ⇒ getNotNlpNotes(note.tokenIndexes.sorted.map(i ⇒ thisSen(i))).filter(_ != note)).
distinct
+ println("delCombs="+delCombs.mkString("\n"))
+
// Optimization. Deletes all wholly swallowed notes.
val links = getLinks(thisSen.flatten)
@@ -670,6 +672,7 @@ object NCNlpSentence extends LazyLogging {
// }
val dict = mutable.HashMap.empty[String, NCNlpSentenceNote]
+ val dictBack = mutable.HashMap.empty[NCNlpSentenceNote, String]
var i = 'A'
@@ -677,11 +680,20 @@ object NCNlpSentence extends LazyLogging {
toksByIdx.map(seq ⇒ {
seq.map(
n ⇒ {
- val s = s"$i"
+ val s =
+ dictBack.get(n) match {
+ case Some(s) ⇒ s
+ case None ⇒ {
+ val s = s"$i"
+
+ i = (i.toInt + 1).toChar
- i = (i.toInt + 1).toChar
+ dict += s → n
+ dictBack += n → s
- dict += s → n
+ s
+ }
+ }
s
}
@@ -695,16 +707,13 @@ object NCNlpSentence extends LazyLogging {
val p = new ForkJoinPool()
val tmp = NCComboRecursiveTask.findCombinations(
- converted.map(_.asJava).asJava,
- new Comparator[String]() {
- override def compare(n1: String, n2: String): Int = n1.compareTo(n2)
- },
+ toksByIdx.map(_.asJava).asJava,
p
)
p.shutdown()
- val seq1 = tmp.asScala.map(_.asScala.map(dict))
+ val seq1 = tmp.asScala.map(_.asScala)
val sens =
seq1.
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/util/NCComboRecursiveTask.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/util/NCComboRecursiveTask.java
index 017c10e..834735b 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/util/NCComboRecursiveTask.java
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/util/NCComboRecursiveTask.java
@@ -19,7 +19,9 @@ package org.apache.nlpcraft.common.util;
import java.util.ArrayList;
import java.util.Collection;
+import java.util.Collections;
import java.util.Comparator;
+import java.util.HashSet;
import java.util.List;
import java.util.concurrent.ForkJoinPool;
import java.util.concurrent.RecursiveTask;
@@ -42,30 +44,17 @@ public class NCComboRecursiveTask extends RecursiveTask<List<Long>> {
this.wordCounts = wordCounts;
}
- public static <T> List<List<T>> findCombinations(List<List<T>> inp, Comparator<T> comparator, ForkJoinPool pool) {
- List<List<T>> uniqueInp = inp.stream()
- .filter(row -> inp.stream().noneMatch(it -> !it.equals(row) && it.containsAll(row)))
- .map(i -> i.stream().distinct().sorted(comparator).collect(toList()))
- .collect(toList());
-
-
- System.out.println("!!!");
- for (List<T> ts : uniqueInp) {
- System.out.println("!!!ts=");
- System.out.println(ts.stream().map(Object::toString).collect(Collectors.joining("\n")));
- }
- System.out.println("!!!");
+ public static <T> List<List<T>> findCombinations(List<List<T>> inp, ForkJoinPool pool) {
+ List<List<T>> uniqueInp = inp;
// Build dictionary of unique words.
List<T> dict = uniqueInp.stream()
.flatMap(Collection::stream)
.distinct()
- .sorted(comparator)
.collect(toList());
- System.out.println("dict=");
- System.out.println(dict.stream().map(Object::toString).collect(Collectors.joining("\n")));
- System.out.println();
+ System.out.println("inp=" + inp);
+ System.out.println("dict=" + dict);
if (dict.size() > Long.SIZE) {
// Note: Power set of 64 words results in 9223372036854775807 combinations.
@@ -87,7 +76,11 @@ public class NCComboRecursiveTask extends RecursiveTask<List<Long>> {
NCComboRecursiveTask task = new NCComboRecursiveTask(lo, hi, wordBits, wordCounts);
- return pool.invoke(task).stream().map(bits -> bitsToWords(bits, dict)).collect(toList());
+ final List<List<T>> res = pool.invoke(task).stream().map(bits -> bitsToWords(bits, dict)).collect(toList());
+
+ System.out.println("res=" + res);
+
+ return res;
}
@Override
diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCEnricherNestedModelSpec.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCEnricherNestedModelSpec.scala
index 9290d56..658feda 100644
--- a/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCEnricherNestedModelSpec.scala
+++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCEnricherNestedModelSpec.scala
@@ -47,15 +47,15 @@ class NCEnricherNestedModelSpec extends NCEnricherBaseSpec {
@Test
def test(): Unit =
runBatch(
- _ ⇒ checkExists(
- "tomorrow",
- usr(text = "tomorrow", id = "x3")
- ),
- _ ⇒ checkExists(
- "tomorrow yesterday",
- usr(text = "tomorrow", id = "x3"),
- usr(text = "yesterday", id = "x3")
- ),
+// _ ⇒ checkExists(
+// "tomorrow",
+// usr(text = "tomorrow", id = "x3")
+// ),
+// _ ⇒ checkExists(
+// "tomorrow yesterday",
+// usr(text = "tomorrow", id = "x3"),
+// usr(text = "yesterday", id = "x3")
+// ),
_ ⇒ checkExists(
"y y",
usr(text = "y y", id = "y3")