You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nlpcraft.apache.org by se...@apache.org on 2021/09/28 10:29:46 UTC

[incubator-nlpcraft] branch NLPCRAFT-456 updated: Code cleanup.

This is an automated email from the ASF dual-hosted git repository.

sergeykamov pushed a commit to branch NLPCRAFT-456
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git


The following commit(s) were added to refs/heads/NLPCRAFT-456 by this push:
     new cfe489f  Code cleanup.
cfe489f is described below

commit cfe489fb8f525e6ef3f770d28be3739d87ae54b5
Author: Sergey Kamov <sk...@gmail.com>
AuthorDate: Tue Sep 28 13:29:39 2021 +0300

    Code cleanup.
---
 .../nlpcraft/common/nlp/NCNlpSentenceNote.scala    |   1 -
 .../probe/mgrs/sentence/NCSentenceManager.scala    | 138 ++++++++++++---------
 2 files changed, 76 insertions(+), 63 deletions(-)

diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentenceNote.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentenceNote.scala
index 255e086..fbf4f01 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentenceNote.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentenceNote.scala
@@ -36,7 +36,6 @@ class NCNlpSentenceNote(private val values: Map[String, JSerializable]) extends
     private lazy val dataWithoutIndexes = this.filter(p => !SKIP_CLONE.contains(p._1))
     private lazy val skipNlp = dataWithoutIndexes.filter { case (key, _) => key != "noteType" }
 
-
     @transient
     private lazy val hash = values.hashCode()
 
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/sentence/NCSentenceManager.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/sentence/NCSentenceManager.scala
index f9f7a01..f1a538e 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/sentence/NCSentenceManager.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/sentence/NCSentenceManager.scala
@@ -599,15 +599,15 @@ object NCSentenceManager extends NCService {
         }).toMap)
 
     /**
-      * This collapser handles several tasks:
-      * - "overall" collapsing after all other individual collapsers had their turn.
-      * - Special further enrichment of tokens like linking, etc.
       *
-      * In all cases of overlap (full or partial) - the "longest" note wins. In case of overlap and equal
-      * lengths - the winning note is chosen based on this priority.
+      * @param sen
+      * @param mdl
+      * @param lastPhase
+      * @param delNotes
       */
-    @throws[NCE]
-    private def collapseSentence(sen: NCNlpSentence, mdl: NCModel, lastPhase: Boolean = false): Seq[NCNlpSentence] = {
+    private def mkVariants(
+        sen: NCNlpSentence, mdl: NCModel, lastPhase: Boolean, delNotes: Seq[NCNlpSentenceNote]
+    ): Seq[NCNlpSentence] = {
         def collapse0(ns: NCNlpSentence): Option[NCNlpSentence] = {
             if (lastPhase)
                 dropAbstract(mdl, ns)
@@ -615,6 +615,70 @@ object NCSentenceManager extends NCService {
             if (collapseSentence(ns, getNotNlpNotes(ns.tokens).map(_.noteType).distinct, lastPhase)) Some(ns) else None
         }
 
+        if (delNotes.nonEmpty) {
+            val notesSeqs: Seq[Set[NCNlpSentenceNote]] =
+                delNotes.flatMap(note => note.wordIndexes.map(_ -> note)).
+                    groupBy { case (idx, _) => idx }.
+                    map { case (_, seq) => seq.map { case (_, note) => note }.toSet }.
+                    toSeq.
+                    sortBy(-_.size)
+
+            def findCombinations(): Seq[Seq[NCNlpSentenceNote]] =
+                NCSentenceHelper.findCombinations(notesSeqs.map(_.asJava).asJava, pool).asScala.map(_.asScala.toSeq)
+
+            val delCombs: Seq[Seq[NCNlpSentenceNote]] = combCache.
+                getOrElseUpdate(sen.srvReqId, mutable.HashMap.empty[CacheKey, CacheValue]).
+                getOrElseUpdate(notesSeqs, findCombinations())
+
+            val seqSens =
+                delCombs.
+                    par.
+                    flatMap(delComb => {
+                        val nsClone = sen.clone()
+
+                        // Saves deleted notes for sentence and their tokens.
+                        addDeleted(sen, nsClone, delComb)
+                        delComb.foreach(nsClone.removeNote)
+
+                        // Has overlapped notes for some tokens.
+                        require(!nsClone.exists(_.count(!_.isNlp) > 1))
+
+                        collapse0(nsClone)
+                    }).seq
+
+            // It removes sentences which have only one difference - 'direct' flag of their user tokens.
+            // `Direct` sentences have higher priority.
+            type Key = Seq[Map[String, JSerializable]]
+            case class Holder(key: Key, sentence: NCNlpSentence, factor: Int)
+
+            def mkHolder(sen: NCNlpSentence): Holder = {
+                val notes = sen.flatten
+
+                Holder(
+                    // We have to delete some keys to have possibility to compare sentences.
+                    notes.map(_.clone().toMap.filter { case (name, _) => name != "direct" }).toSeq,
+                    sen,
+                    notes.filter(_.isNlp).map(p => if (p.isDirect) 0 else 1).sum
+                )
+            }
+
+            seqSens.par.map(mkHolder).seq.groupBy(_.key).map { case (_, seq) => seq.minBy(_.factor).sentence }.toSeq
+        }
+        else
+            collapse0(sen).flatMap(p => Option(Seq(p))).getOrElse(Seq.empty)
+
+    }
+
+    /**
+      * This collapser handles several tasks:
+      * - "overall" collapsing after all other individual collapsers had their turn.
+      * - Special further enrichment of tokens like linking, etc.
+      *
+      * In all cases of overlap (full or partial) - the "longest" note wins. In case of overlap and equal
+      * lengths - the winning note is chosen based on this priority.
+      */
+    @throws[NCE]
+    private def collapseSentence(sen: NCNlpSentence, mdl: NCModel, lastPhase: Boolean = false): Seq[NCNlpSentence] = {
         // Always deletes `similar` notes.
         // Some words with same note type can be detected various ways.
         // We keep only one variant -  with `best` direct and sparsity parameters,
@@ -644,7 +708,7 @@ object NCSentenceManager extends NCService {
 
         redundant.foreach(sen.removeNote)
 
-        var delCombs: Seq[NCNlpSentenceNote] =
+        var delNotes: Seq[NCNlpSentenceNote] =
             getNotNlpNotes(sen.tokens).
                 flatMap(note => getNotNlpNotes(note.tokenIndexes.map(sen(_))).filter(_ != note)).
                 distinct
@@ -653,7 +717,7 @@ object NCSentenceManager extends NCService {
         val links = getLinks(sen.tokens.toSeq.flatten)
 
         val swallowed =
-            delCombs.
+            delNotes.
                 // There aren't links on it.
                 filter(n => !links.contains(NoteLink(n.noteType, n.tokenIndexes.sorted))).
                 // It doesn't have links.
@@ -663,7 +727,7 @@ object NCSentenceManager extends NCService {
                     val key = NCTokenPartKey(note, sen)
 
                     val delCombOthers =
-                        delCombs.filter(_ != note).flatMap(n => if (getPartKeys(n).contains(key)) Some(n) else None)
+                        delNotes.filter(_ != note).flatMap(n => if (getPartKeys(n).contains(key)) Some(n) else None)
 
                     if (
                         delCombOthers.nonEmpty &&
@@ -674,61 +738,11 @@ object NCSentenceManager extends NCService {
                         None
                 })
 
-        delCombs = delCombs.filter(p => !swallowed.contains(p))
+        delNotes = delNotes.filter(p => !swallowed.contains(p))
         addDeleted(sen, sen, swallowed)
         swallowed.foreach(sen.removeNote)
 
-        var sens =
-            if (delCombs.nonEmpty) {
-                val toksByIdx =
-                    delCombs.flatMap(note => note.wordIndexes.map(_ -> note)).
-                        groupBy { case (idx, _) => idx }.
-                        map { case (_, seq) => seq.map { case (_, note) => note }.toSet }.
-                        toSeq.sortBy(-_.size)
-
-                def findCombinations(): Seq[Seq[NCNlpSentenceNote]] =
-                    NCSentenceHelper.findCombinations(toksByIdx.map(_.asJava).asJava, pool).asScala.map(_.asScala.toSeq)
-
-                val seqSens =
-                    combCache.
-                        getOrElseUpdate(sen.srvReqId, mutable.HashMap.empty[CacheKey, CacheValue]).
-                        getOrElseUpdate(
-                            toksByIdx,
-                            findCombinations()
-                        ).par.
-                        flatMap(delComb => {
-                            val nsClone = sen.clone()
-
-                            // Saves deleted notes for sentence and their tokens.
-                            addDeleted(sen, nsClone, delComb)
-                            delComb.foreach(nsClone.removeNote)
-
-                            // Has overlapped notes for some tokens.
-                            require(!nsClone.exists(_.count(!_.isNlp) > 1))
-
-                            collapse0(nsClone)
-                        }).seq
-
-                // It removes sentences which have only one difference - 'direct' flag of their user tokens.
-                // `Direct` sentences have higher priority.
-                type Key = Seq[Map[String, JSerializable]]
-                case class Holder(key: Key, sentence: NCNlpSentence, factor: Int)
-
-                def mkHolder(sen: NCNlpSentence): Holder = {
-                    val notes = sen.flatten
-
-                    Holder(
-                        // We have to delete some keys to have possibility to compare sentences.
-                        notes.map(_.clone().toMap.filter { case (name, _) => name != "direct" }).toSeq,
-                        sen,
-                        notes.filter(_.isNlp).map(p => if (p.isDirect) 0 else 1).sum
-                    )
-                }
-
-                seqSens.par.map(mkHolder).seq.groupBy(_.key).map { case (_, seq) => seq.minBy(_.factor).sentence }.toSeq
-            }
-            else
-                collapse0(sen).flatMap(p => Option(Seq(p))).getOrElse(Seq.empty)
+        var sens = mkVariants( sen, mdl, lastPhase, delNotes)
 
         sens.par.foreach(sen =>
             sen.foreach(tok =>