You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nlpcraft.apache.org by se...@apache.org on 2021/03/09 18:40:33 UTC

[incubator-nlpcraft] branch master updated: Sentences collapsing performance improvements.

This is an automated email from the ASF dual-hosted git repository.

sergeykamov pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git


The following commit(s) were added to refs/heads/master by this push:
     new 1b19154  Sentences collapsing performance improvements.
1b19154 is described below

commit 1b19154436d30602ef6a0174cb1f2148d9714bfc
Author: Sergey Kamov <sk...@gmail.com>
AuthorDate: Tue Mar 9 21:40:17 2021 +0300

    Sentences collapsing performance improvements.
---
 .../apache/nlpcraft/common/nlp/NCNlpSentence.scala | 773 +--------------------
 .../org/apache/nlpcraft/probe/NCProbeBoot.scala    |   2 +
 .../nlpcraft/probe/mgrs/NCProbeVariants.scala      |   4 +-
 .../probe/mgrs/nlp/NCProbeEnrichmentManager.scala  |   3 +-
 .../mgrs/nlp/enrichers/model/NCModelEnricher.scala |   9 +-
 .../probe/mgrs/sentence/NCSentenceHelper.java      | 199 ++++++
 .../mgrs/sentence/NCSentenceManager.scala}         | 351 +++-------
 .../model/NCEnricherNestedModelSpec4.scala         |  53 ++
 8 files changed, 397 insertions(+), 997 deletions(-)

diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala
index 113e088..91ca5a9 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala
@@ -17,527 +17,16 @@
 
 package org.apache.nlpcraft.common.nlp
 
-import com.typesafe.scalalogging.LazyLogging
-import org.apache.nlpcraft.common.NCE
-import org.apache.nlpcraft.common.nlp.pos.NCPennTreebank
-import org.apache.nlpcraft.model.NCModel
-
-import java.util
-import java.util.{List ⇒ JList}
 import java.io.{Serializable ⇒ JSerializable}
-import java.util.Collections
+import java.util.{Collections, List ⇒ JList}
 import scala.collection.JavaConverters._
-import scala.collection.mutable.ArrayBuffer
 import scala.collection.{Map, Seq, Set, mutable}
 import scala.language.implicitConversions
 
-object NCNlpSentence extends LazyLogging {
-    implicit def toTokens(x: NCNlpSentence): ArrayBuffer[NCNlpSentenceToken] = x.tokens
-
+object NCNlpSentence {
     case class NoteKey(start: Int, end: Int)
     case class TokenKey(id: String, start: Int, end: Int)
     case class NoteLink(note: String, indexes: Seq[Int])
-
-    case class PartKey(id: String, start: Int, end: Int) {
-        require(start <= end)
-
-        private def in(i: Int): Boolean = i >= start && i <= end
-        def intersect(id: String, start: Int, end: Int): Boolean = id == this.id && (in(start) || in(end))
-    }
-    object PartKey {
-        def apply(m: util.HashMap[String, JSerializable]): PartKey = {
-            def get[T](name: String): T = m.get(name).asInstanceOf[T]
-
-            PartKey(get("id"), get("startcharindex"), get("endcharindex"))
-        }
-
-        def apply(t: NCNlpSentenceNote, sen: NCNlpSentence): PartKey =
-            PartKey(t.noteType, sen(t.tokenFrom).startCharIndex, sen(t.tokenTo).endCharIndex)
-    }
-
-    private def getLinks(notes: Seq[NCNlpSentenceNote]): Seq[NoteLink] = {
-        val noteLinks = mutable.ArrayBuffer.empty[NoteLink]
-
-        for (n ← notes.filter(n ⇒ n.noteType == "nlpcraft:limit" || n.noteType == "nlpcraft:references"))
-            noteLinks += NoteLink(n("note").asInstanceOf[String], n("indexes").asInstanceOf[JList[Int]].asScala.sorted)
-
-        for (n ← notes.filter(_.noteType == "nlpcraft:sort")) {
-            def add(noteName: String, idxsName: String): Unit = {
-                val names = n(noteName).asInstanceOf[JList[String]]
-                val idxsSeq = n(idxsName).asInstanceOf[JList[JList[Int]]]
-
-                require(names.size() == idxsSeq.size())
-
-                noteLinks ++=
-                    (for ((name, idxs) ← names.asScala.zip(idxsSeq.asScala.map(_.asScala)))
-                        yield NoteLink(name, idxs.sorted)
-                    )
-            }
-
-            if (n.contains("subjnotes")) add("subjnotes", "subjindexes")
-            if (n.contains("bynotes")) add("bynotes", "byindexes")
-        }
-
-        noteLinks
-    }
-
-    private def getPartKeys(notes: NCNlpSentenceNote*): Seq[PartKey] =
-        notes.
-            filter(_.isUser).
-            flatMap(n ⇒ {
-                val optList: Option[JList[util.HashMap[String, JSerializable]]] = n.dataOpt("parts")
-
-                optList
-            }).flatMap(_.asScala).map(m ⇒ PartKey(m)).distinct
-
-    /**
-      *
-      * @param ns
-      * @param idxs
-      * @param notesType
-      * @param note
-      * @return
-      */
-    private def checkRelation(ns: NCNlpSentence, idxs: Seq[Int], notesType: String, note: NCNlpSentenceNote): Boolean = {
-        val types = idxs.flatMap(idx ⇒ ns(idx).map(p ⇒ p).filter(!_.isNlp).map(_.noteType)).distinct
-
-        /**
-          * Example:
-          * 1. Sentence 'maximum x' (single element related function)
-          * - maximum is aggregate function linked to date element.
-          * - x defined as 2 elements: date and num.
-          * So, the variant 'maximum x (as num)' should be excluded.
-          * *
-          * 2. Sentence 'compare x and y' (multiple elements related function)
-          * - compare is relation function linked to date element.
-          * - x an y defined as 2 elements: date and num.
-          * So, variants 'x (as num) and x (as date)'  and 'x (as date) and x (as num)'
-          * should not be excluded, but invalid relation should be deleted for these combinations.
-          */
-        types.size match {
-            case 0 ⇒ false
-            case 1 ⇒ types.head == notesType
-            case _ ⇒
-                // Equal elements should be processed together with function element.
-                if (types.size == 1)
-                    false
-                else {
-                    ns.removeNote(note)
-
-                    logger.trace(s"Removed note: $note")
-
-                    true
-                }
-        }
-    }
-
-    /**
-      * Fixes notes with references to other notes indexes.
-      * Note that 'idxsField' is 'indexes' and 'noteField' is 'note' for all kind of references.
-      *
-      * @param noteType Note type.
-      * @param idxsField Indexes field.
-      * @param noteField Note field.
-      * @param ns Sentence.
-      * @param history Indexes transformation history.
-      * @return Valid flag.
-      */
-    private def fixIndexesReferences(
-        noteType: String,
-        idxsField: String,
-        noteField: String,
-        ns: NCNlpSentence,
-        history: Seq[(Int, Int)]
-    ): Boolean = {
-        ns.filter(_.isTypeOf(noteType)).foreach(tok ⇒
-            tok.getNoteOpt(noteType, idxsField) match {
-                case Some(n) ⇒
-                    val idxs: Seq[Int] = n.data[JList[Int]](idxsField).asScala
-                    var fixed = idxs
-
-                    history.foreach { case (idxOld, idxNew) ⇒ fixed = fixed.map(i ⇒ if (i == idxOld) idxNew else i) }
-
-                    fixed = fixed.distinct
-
-                    if (idxs != fixed)
-                        ns.fixNote(n, "indexes" → fixed.asJava.asInstanceOf[JSerializable])
-                case None ⇒ // No-op.
-            }
-        )
-
-        ns.flatMap(_.getNotes(noteType)).forall(
-            n ⇒ checkRelation(ns, n.data[JList[Int]]("indexes").asScala, n.data[String](noteField), n)
-        )
-    }
-
-    /**
-      *
-      * @param note
-      * @param idxsField
-      * @param noteField
-      * @param ns
-      */
-    private def fixNoteIndexes(note: String, idxsField: String, noteField: String, ns: NCNlpSentence): Unit =
-        ns.flatMap(_.getNotes(note)).foreach(
-            n ⇒ checkRelation(ns, n.data[JList[Int]](idxsField).asScala, n.data[String](noteField), n)
-        )
-
-    /**
-      *
-      * @param note
-      * @param idxsField
-      * @param noteField
-      * @param ns
-      */
-    private def fixNoteIndexesList(note: String, idxsField: String, noteField: String, ns: NCNlpSentence): Unit = {
-        ns.flatMap(_.getNotes(note)).foreach(rel ⇒
-            rel.dataOpt[JList[JList[Int]]](idxsField) match {
-                case Some(idxsList) ⇒
-                    val notesTypes = rel.data[JList[String]](noteField)
-
-                    require(idxsList.size() == notesTypes.size())
-
-                    idxsList.asScala.zip(notesTypes.asScala).foreach {
-                        case (idxs, notesType) ⇒ checkRelation(ns, idxs.asScala, notesType, rel)
-                    }
-                case None ⇒ // No-op.
-            }
-        )
-    }
-
-    /**
-      * Copies token.
-      *
-      * @param ns Sentence.
-      * @param history Indexes transformation history.
-      * @param toksCopy Copied tokens.
-      * @param i Index.
-      */
-    private def simpleCopy(
-        ns: NCNlpSentence,
-        history: mutable.ArrayBuffer[(Int, Int)],
-        toksCopy: NCNlpSentence, i: Int
-    ): Seq[NCNlpSentenceToken] = {
-        val tokCopy = toksCopy(i)
-
-        history += tokCopy.index → ns.size
-
-        ns += tokCopy.clone(ns.size)
-    }
-
-    /**
-      * Glues stop words.
-      *
-      * @param ns Sentence.
-      * @param userNoteTypes Notes types.
-      * @param history Indexes transformation history.
-      */
-    private def unionStops(
-        ns: NCNlpSentence,
-        userNoteTypes: Seq[String],
-        history: mutable.ArrayBuffer[(Int, Int)]
-    ): Unit = {
-        // Java collection used because using scala collections (mutable.Buffer.empty[mutable.Buffer[Token]]) is reason
-        // Of compilation errors which seems as scala compiler internal error.
-        val bufs = new util.ArrayList[mutable.Buffer[NCNlpSentenceToken]]()
-
-        def last[T](l: JList[T]): T = l.get(l.size() - 1)
-
-        ns.filter(t ⇒ t.isStopWord && !t.isBracketed).foreach(t ⇒
-            if (!bufs.isEmpty && last(bufs).last.index + 1 == t.index)
-                last(bufs) += t
-            else
-                bufs.add(mutable.Buffer.empty[NCNlpSentenceToken] :+ t)
-        )
-
-        val idxsSeq = bufs.asScala.filter(_.lengthCompare(1) > 0).map(_.map(_.index))
-
-        if (idxsSeq.nonEmpty) {
-            val nsCopyToks = ns.clone()
-            ns.clear()
-
-            val buf = mutable.Buffer.empty[Int]
-
-            for (i ← nsCopyToks.indices)
-                idxsSeq.find(_.contains(i)) match {
-                    case Some(idxs) ⇒
-                        if (!buf.contains(idxs.head)) {
-                            buf += idxs.head
-
-                            ns += mkCompound(ns, nsCopyToks, idxs, stop = true, ns.size, None, history)
-                        }
-                    case None ⇒ simpleCopy(ns, history, nsCopyToks, i)
-                }
-
-            fixIndexes(ns, userNoteTypes)
-        }
-    }
-
-    /**
-      * Fixes indexes for all notes after recreating tokens.
-      *
-      * @param ns Sentence.
-      * @param userNoteTypes Notes types.
-      */
-    private def fixIndexes(ns: NCNlpSentence, userNoteTypes: Seq[String]) {
-        // Replaces other notes indexes.
-        for (t ← userNoteTypes :+ "nlpcraft:nlp"; note ← ns.getNotes(t)) {
-            val toks = ns.filter(_.contains(note)).sortBy(_.index)
-
-            val newNote = note.clone(toks.map(_.index), toks.flatMap(_.wordIndexes).sorted)
-
-            toks.foreach(t ⇒ {
-                t.remove(note)
-                t.add(newNote)
-            })
-        }
-
-        // Special case - field index of core NLP note.
-        ns.zipWithIndex.foreach { case (tok, idx) ⇒ ns.fixNote(tok.getNlpNote, "index" → idx) }
-    }
-
-    /**
-      * Zip notes with same type.
-      *
-      * @param ns Sentence.
-      * @param nType Notes type.
-      * @param userNotesTypes Notes types.
-      * @param history Indexes transformation history.
-      */
-    private def zipNotes(
-        ns: NCNlpSentence,
-        nType: String,
-        userNotesTypes: Seq[String],
-        history: mutable.ArrayBuffer[(Int, Int)]
-    ): Unit = {
-        val nts = ns.getNotes(nType).filter(n ⇒ n.tokenFrom != n.tokenTo).sortBy(_.tokenFrom)
-
-        val overlapped =
-            nts.flatMap(n ⇒ n.tokenFrom to n.tokenTo).map(ns(_)).exists(
-                t ⇒ userNotesTypes.map(pt ⇒ t.getNotes(pt).size).sum > 1
-            )
-
-        if (nts.nonEmpty && !overlapped) {
-            val nsCopyToks = ns.clone()
-            ns.clear()
-
-            val buf = mutable.ArrayBuffer.empty[Int]
-
-            for (i ← nsCopyToks.indices)
-                nts.find(_.tokenIndexes.contains(i)) match {
-                    case Some(n) ⇒
-                        if (!buf.contains(n.tokenFrom)) {
-                            buf += n.tokenFrom
-
-                            ns += mkCompound(ns, nsCopyToks, n.tokenIndexes, stop = false, ns.size, Some(n), history)
-                        }
-                    case None ⇒ simpleCopy(ns, history, nsCopyToks, i)
-                }
-
-            fixIndexes(ns, userNotesTypes)
-        }
-    }
-
-    /**
-      * Makes compound note.
-      *
-      * @param ns Sentence.
-      * @param nsCopyToks Tokens.
-      * @param indexes Indexes.
-      * @param stop Flag.
-      * @param idx Index.
-      * @param commonNote Common note.
-      * @param history Indexes transformation history.
-      */
-    private def mkCompound(
-        ns: NCNlpSentence,
-        nsCopyToks: Seq[NCNlpSentenceToken],
-        indexes: Seq[Int],
-        stop: Boolean,
-        idx: Int,
-        commonNote: Option[NCNlpSentenceNote],
-        history: mutable.ArrayBuffer[(Int, Int)]
-    ): NCNlpSentenceToken = {
-        val t = NCNlpSentenceToken(idx)
-
-        // Note, it adds stop-words too.
-        val content = nsCopyToks.zipWithIndex.filter(p ⇒ indexes.contains(p._2)).map(_._1)
-
-        content.foreach(t ⇒ history += t.index → idx)
-
-        def mkValue(get: NCNlpSentenceToken ⇒ String): String = {
-            val buf = mutable.Buffer.empty[String]
-
-            val n = content.size - 1
-
-            content.zipWithIndex.foreach(p ⇒ {
-                val t = p._1
-                val idx = p._2
-
-                buf += get(t)
-
-                if (idx < n && t.endCharIndex != content(idx + 1).startCharIndex)
-                    buf += " "
-            })
-
-            buf.mkString
-        }
-
-        val origText = mkValue((t: NCNlpSentenceToken) ⇒ t.origText)
-
-        val idxs = Seq(idx)
-        val wordIdxs = content.flatMap(_.wordIndexes).sorted
-
-        val direct =
-            commonNote match {
-                case Some(n) if n.isUser ⇒ n.isDirect
-                case _ ⇒ content.forall(_.isDirect)
-            }
-
-        val params = Seq(
-            "index" → idx,
-            "pos" → NCPennTreebank.SYNTH_POS,
-            "posDesc" → NCPennTreebank.SYNTH_POS_DESC,
-            "lemma" → mkValue((t: NCNlpSentenceToken) ⇒ t.lemma),
-            "origText" → origText,
-            "normText" → mkValue((t: NCNlpSentenceToken) ⇒ t.normText),
-            "stem" → mkValue((t: NCNlpSentenceToken) ⇒ t.stem),
-            "start" → content.head.startCharIndex,
-            "end" → content.last.endCharIndex,
-            "charLength" → origText.length,
-            "quoted" → false,
-            "stopWord" → stop,
-            "bracketed" → false,
-            "direct" → direct,
-            "dict" → (if (nsCopyToks.size == 1) nsCopyToks.head.getNlpNote.data[Boolean]("dict") else false),
-            "english" → nsCopyToks.forall(_.getNlpNote.data[Boolean]("english")),
-            "swear" → nsCopyToks.exists(_.getNlpNote.data[Boolean]("swear"))
-        )
-
-        val nlpNote = NCNlpSentenceNote(idxs, wordIdxs, "nlpcraft:nlp", params: _*)
-
-        t.add(nlpNote)
-
-        // Adds processed note with fixed indexes.
-        commonNote match {
-            case Some(n) ⇒
-                ns.removeNote(n)
-                t.add(n.clone(idxs, wordIdxs))
-            case None ⇒ // No-op.
-        }
-
-        t
-    }
-
-    /**
-      * Fixes notes with references list to other notes indexes.
-      *
-      * @param noteType Note type.
-      * @param idxsField Indexes field.
-      * @param noteField Note field.
-      * @param ns Sentence.
-      * @param history Indexes transformation history.
-      * @return Valid flag.
-      */
-    private def fixIndexesReferencesList(
-        noteType: String,
-        idxsField: String,
-        noteField: String,
-        ns: NCNlpSentence,
-        history: Seq[(Int, Int)]
-    ): Boolean = {
-        var ok = true
-
-        for (tok ← ns.filter(_.isTypeOf(noteType)) if ok)
-            tok.getNoteOpt(noteType, idxsField) match {
-                case Some(n) ⇒
-                    val idxs: Seq[Seq[Int]] =
-                        n.data[JList[JList[Int]]](idxsField).asScala.map(_.asScala)
-                    var fixed = idxs
-
-                    history.foreach {
-                        case (idxOld, idxNew) ⇒ fixed = fixed.map(_.map(i ⇒ if (i == idxOld) idxNew else i).distinct)
-                    }
-
-                    if (fixed.forall(_.size == 1))
-                        // Fix double dimension array to one dimension,
-                        // so it should be called always in spite of 'fixIndexesReferences' method.
-                        ns.fixNote(n, idxsField → fixed.map(_.head).asJava.asInstanceOf[JSerializable])
-                    else
-                        ok = false
-                case None ⇒ // No-op.
-            }
-        ok &&
-            ns.flatMap(_.getNotes(noteType)).forall(rel ⇒
-                rel.dataOpt[JList[Int]](idxsField) match {
-                    case Some(idxsList) ⇒
-                        val notesTypes = rel.data[JList[String]](noteField)
-
-                        require(idxsList.size() == notesTypes.size())
-
-                        idxsList.asScala.zip(notesTypes.asScala).forall {
-                            case (idxs, notesType) ⇒ checkRelation(ns, Seq(idxs), notesType, rel)
-                        }
-                    case None ⇒ true
-                }
-            )
-    }
-
-    /**
-      * Fixes tokens positions.
-      *
-      * @param ns Sentence.
-      * @param notNlpTypes Token types.
-      */
-    private def collapseSentence(ns: NCNlpSentence, notNlpTypes: Seq[String]): Boolean = {
-        ns.
-            filter(!_.isNlp).
-            filter(_.isStopWord).
-            flatten.
-            filter(_.isNlp).
-            foreach(n ⇒ ns.fixNote(n, "stopWord" → false))
-
-        val all = ns.tokens.flatten
-        val nsNotes: Map[String, Seq[Int]] = all.map(p ⇒ p.noteType → p.tokenIndexes).toMap
-
-        for (
-            t ← ns.tokens; stopReason ← t.stopsReasons
-                if all.contains(stopReason) && nsNotes.getOrElse(stopReason.noteType, Seq.empty) == stopReason.tokenIndexes
-        )
-            ns.fixNote(t.getNlpNote, "stopWord" → true)
-
-        val history = mutable.ArrayBuffer.empty[(Int, Int)]
-
-        fixNoteIndexes("nlpcraft:relation", "indexes", "note", ns)
-        fixNoteIndexes("nlpcraft:limit", "indexes", "note", ns)
-        fixNoteIndexesList("nlpcraft:sort", "subjindexes", "subjnotes", ns)
-        fixNoteIndexesList("nlpcraft:sort", "byindexes", "bynotes", ns)
-
-        notNlpTypes.foreach(typ ⇒ zipNotes(ns, typ, notNlpTypes, history))
-        unionStops(ns, notNlpTypes, history)
-
-        val res =
-            fixIndexesReferences("nlpcraft:relation", "indexes", "note", ns, history) &&
-            fixIndexesReferences("nlpcraft:limit", "indexes", "note", ns, history) &&
-            fixIndexesReferencesList("nlpcraft:sort", "subjindexes", "subjnotes", ns, history) &&
-            fixIndexesReferencesList("nlpcraft:sort", "byindexes", "bynotes", ns, history)
-
-        if (res) {
-            // Validation (all indexes calculated well)
-            require(
-                !res ||
-                    !ns.flatten.
-                        exists(n ⇒ ns.filter(_.wordIndexes.exists(n.wordIndexes.contains)).exists(t ⇒ !t.contains(n))),
-                s"Invalid sentence:\n" +
-                    ns.map(t ⇒
-                        // Human readable invalid sentence for debugging.
-                        s"${t.origText}{index:${t.index}}[${t.map(n ⇒ s"${n.noteType}, {range:${n.tokenFrom}-${n.tokenTo}}").mkString("|")}]"
-                    ).mkString("\n")
-            )
-        }
-
-        res
-    }
 }
 
 import org.apache.nlpcraft.common.nlp.NCNlpSentence._
@@ -567,29 +56,16 @@ class NCNlpSentence(
     private def calcHash(): Int =
         Seq(srvReqId, text, enabledBuiltInToks, tokens).map(_.hashCode()).foldLeft(0)((a, b) ⇒ 31 * a + b)
 
-    private def addDeleted(sen: NCNlpSentence, dels: Iterable[NCNlpSentenceNote]): Unit =
-        sen.deletedNotes ++= dels.map(n ⇒ {
-            val savedDelNote = n.clone()
-            val savedDelToks = n.tokenIndexes.map(idx ⇒ this(idx).clone())
-
-            val mainNotes = savedDelToks.flatten.filter(n ⇒ n.noteType != "nlpcraft:nlp" && n != savedDelNote)
-
-            // Deleted note's tokens should contains only nlp data and deleted notes.
-            for (savedDelTok ← savedDelToks; mainNote ← mainNotes)
-                savedDelTok.remove(mainNote)
-
-            savedDelNote → savedDelToks
-        })
-
     // Deep copy.
     override def clone(): NCNlpSentence =
         new NCNlpSentence(
-            srvReqId,
-            text,
-            enabledBuiltInToks,
-            tokens.map(_.clone()),
-            deletedNotes.map(p ⇒ p._1.clone() → p._2.map(_.clone())),
-            initNlpNotes = initNlpNotes
+            srvReqId = srvReqId,
+            text = text,
+            enabledBuiltInToks = enabledBuiltInToks,
+            tokens = tokens.map(_.clone()),
+            deletedNotes = deletedNotes.map(p ⇒ p._1.clone() → p._2.map(_.clone())),
+            initNlpNotes = initNlpNotes,
+            nlpTokens = nlpTokens
         )
 
     /**
@@ -617,6 +93,21 @@ class NCNlpSentence(
         hash
     }
 
+    override def equals(obj: Any): Boolean = obj match {
+        case x: NCNlpSentence ⇒
+            tokens == x.tokens &&
+                srvReqId == x.srvReqId &&
+                text == x.text &&
+                enabledBuiltInToks == x.enabledBuiltInToks
+
+        case _ ⇒ false
+    }
+
+    /**
+      *
+      * @param note
+      * @param kvs
+      */
     def fixNote(note: NCNlpSentenceNote, kvs: (String, JSerializable)*): Unit = {
         val fixed = note.clone(kvs: _*)
 
@@ -628,201 +119,6 @@ class NCNlpSentence(
         hash = null
     }
 
-    private def dropAbstract(mdl: NCModel, ns: NCNlpSentence): Unit =
-        if (!mdl.getAbstractTokens.isEmpty) {
-            val notes = ns.flatten
-
-            val keys = getPartKeys(notes :_*)
-            val noteLinks = getLinks(notes)
-
-            notes.filter(n ⇒ {
-                val noteToks = ns.tokens.filter(_.contains(n))
-
-                mdl.getAbstractTokens.contains(n.noteType) &&
-                !keys.exists(_.intersect(n.noteType, noteToks.head.startCharIndex, noteToks.last.startCharIndex)) &&
-                !noteLinks.contains(NoteLink(n.noteType, n.tokenIndexes.sorted))
-            }).foreach(ns.removeNote)
-        }
-
-    private def getNotNlpNotes(toks: Seq[NCNlpSentenceToken]): Seq[NCNlpSentenceNote] =
-        toks.flatten.filter(!_.isNlp).distinct
-
-    /**
-      * This collapser handles several tasks:
-      * - "overall" collapsing after all other individual collapsers had their turn.
-      * - Special further enrichment of tokens like linking, etc.
-      *
-      * In all cases of overlap (full or partial) - the "longest" note wins. In case of overlap and equal
-      * lengths - the winning note is chosen based on this priority.
-      */
-    @throws[NCE]
-    def collapse(mdl: NCModel, lastPhase: Boolean = false): Seq[NCNlpSentence] = {
-        def collapse0(ns: NCNlpSentence): Option[NCNlpSentence] = {
-            if (lastPhase)
-                dropAbstract(mdl, ns)
-
-            if (collapseSentence(ns, getNotNlpNotes(ns).map(_.noteType).distinct)) Some(ns) else None
-        }
-
-        // Always deletes `similar` notes.
-        // Some words with same note type can be detected various ways.
-        // We keep only one variant -  with `best` direct and sparsity parameters,
-        // other variants for these words are redundant.
-        val redundant: Seq[NCNlpSentenceNote] =
-            this.flatten.filter(!_.isNlp).distinct.
-                groupBy(_.getKey()).
-                map(p ⇒ p._2.sortBy(p ⇒
-                    (
-                        // System notes don't have such flags.
-                        if (p.isUser) {
-                            if (p.isDirect)
-                                0
-                            else
-                                1
-                        }
-                        else
-                            0,
-                        if (p.isUser)
-                            p.sparsity
-                        else
-                            0
-                    )
-                )).
-                flatMap(_.drop(1)).
-                toSeq
-
-        redundant.foreach(this.removeNote)
-
-        var delCombs: Seq[NCNlpSentenceNote] =
-            getNotNlpNotes(this).
-                flatMap(note ⇒ getNotNlpNotes(note.tokenIndexes.sorted.map(i ⇒ this(i))).filter(_ != note)).
-                distinct
-
-        // Optimization. Deletes all wholly swallowed notes.
-        val links = getLinks(this.flatten)
-
-        val swallowed =
-            delCombs.
-                // There aren't links on it.
-                filter(n ⇒ !links.contains(NoteLink(n.noteType, n.tokenIndexes.sorted))).
-                // It doesn't have links.
-                filter(getPartKeys(_).isEmpty).
-                flatMap(note ⇒ {
-                    val noteWordsIdxs = note.wordIndexes.toSet
-                    val key = PartKey(note, this)
-
-                    val delCombOthers =
-                        delCombs.filter(_ != note).flatMap(n ⇒ if (getPartKeys(n).contains(key)) Some(n) else None)
-
-                    if (delCombOthers.exists(o ⇒ noteWordsIdxs == o.wordIndexes.toSet)) Some(note) else None
-                })
-
-        delCombs = delCombs.filter(p ⇒ !swallowed.contains(p))
-        addDeleted(this, swallowed)
-        swallowed.foreach(this.removeNote)
-
-        val toksByIdx: Seq[Seq[NCNlpSentenceNote]] =
-            delCombs.flatMap(note ⇒ note.wordIndexes.map(_ → note)).
-                groupBy { case (idx, _) ⇒ idx }.
-                map { case (_, seq) ⇒ seq.map { case (_, note) ⇒ note } }.
-                toSeq.sortBy(-_.size)
-
-        val minDelSize = if (toksByIdx.isEmpty) 1 else toksByIdx.map(_.size).max - 1
-
-        var sens =
-            if (delCombs.nonEmpty) {
-                val deleted = mutable.ArrayBuffer.empty[Set[NCNlpSentenceNote]]
-
-                val sens =
-                    (minDelSize to delCombs.size).
-                        flatMap(i ⇒
-                            delCombs.combinations(i).
-                                filter(delComb ⇒
-                                    !toksByIdx.exists(
-                                        rec ⇒
-                                            rec.size - delCombs.size <= 1 &&
-                                            rec.count(note ⇒ !delComb.contains(note)) > 1
-                                    )
-                                )
-                        ).
-                        sortBy(_.size).
-                        map(_.toSet).
-                        flatMap(delComb ⇒
-                            // Already processed with less subset of same deleted tokens.
-                            if (!deleted.exists(_.subsetOf(delComb))) {
-                                val nsClone = this.clone()
-
-                                // Saves deleted notes for sentence and their tokens.
-                                addDeleted(nsClone, delComb)
-                                delComb.foreach(nsClone.removeNote)
-
-                                // Has overlapped notes for some tokens.
-                                require(!nsClone.exists(_.count(!_.isNlp) > 1))
-
-                                deleted += delComb
-
-                                collapse0(nsClone)
-                            }
-                            else
-                                None
-                        )
-
-                // It removes sentences which have only one difference - 'direct' flag of their user tokens.
-                // `Direct` sentences have higher priority.
-                case class Key(sysNotes: Seq[Map[String, JSerializable]], userNotes: Seq[Map[String, JSerializable]])
-                case class Value(sentence: NCNlpSentence, directCount: Int)
-
-                val m = mutable.HashMap.empty[Key, Value]
-
-                sens.map(sen ⇒ {
-                    val notes = sen.flatten
-
-                    val sysNotes = notes.filter(_.isSystem)
-                    val nlpNotes = notes.filter(_.isNlp)
-                    val userNotes = notes.filter(_.isUser)
-
-                    def get(seq: Seq[NCNlpSentenceNote]): Seq[Map[String, JSerializable]] =
-                        seq.map(p ⇒
-                            // We have to delete some keys to have possibility to compare sentences.
-                            p.clone().filter(_._1 != "direct")
-                        )
-
-                    (Key(get(sysNotes), get(userNotes)), sen, nlpNotes.map(p ⇒ if (p.isDirect) 0 else 1).sum)
-                }).
-                    foreach { case (key, sen, directCnt) ⇒
-                        m.get(key) match {
-                            case Some(v) ⇒
-                                // Best sentence is sentence with `direct` synonyms.
-                                if (v.directCount > directCnt)
-                                    m += key → Value(sen, directCnt)
-                            case None ⇒ m += key → Value(sen, directCnt)
-                        }
-                    }
-
-                m.values.map(_.sentence).toSeq
-            }
-            else
-                collapse0(this).flatMap(p ⇒ Option(Seq(p))).getOrElse(Seq.empty)
-
-        sens = sens.distinct
-
-        sens.foreach(sen ⇒
-            sen.foreach(tok ⇒
-                tok.size match {
-                    case 1 ⇒ require(tok.head.isNlp, s"Unexpected non-'nlpcraft:nlp' token: $tok")
-                    case 2 ⇒ require(tok.head.isNlp ^ tok.last.isNlp, s"Unexpected token notes: $tok")
-                    case _ ⇒ require(requirement = false, s"Unexpected token notes count: $tok")
-                }
-            )
-        )
-
-        // Drops similar sentences (with same tokens structure).
-        // Among similar sentences we prefer one with minimal free words count.
-        sens.groupBy(_.flatten.filter(!_.isNlp).map(_.getKey(withIndexes = false))).
-            map { case (_, seq) ⇒ seq.minBy(_.filter(p ⇒ p.isNlp && !p.isStopWord).map(_.wordIndexes.length).sum) }.
-            toSeq
-    }
-
     /**
       * Returns flag are note notes equal (or similar) or not. Reason of ignored difference can be stopwords tokens.
       *
@@ -884,16 +180,6 @@ class NCNlpSentence(
             getUniqueKey0(n1) == getUniqueKey0(n2) && wordsEqualOrSimilar(n1, n2) && referencesEqualOrSimilar(n1, n2)
         }
 
-    override def equals(obj: Any): Boolean = obj match {
-        case x: NCNlpSentence ⇒
-            tokens == x.tokens &&
-                srvReqId == x.srvReqId &&
-                text == x.text &&
-                enabledBuiltInToks == x.enabledBuiltInToks
-
-        case _ ⇒ false
-    }
-
     /**
       *
       */
@@ -904,7 +190,7 @@ class NCNlpSentence(
       *
       * @return
       */
-    def findInitialNlpNote(startCharIndex: Int, endCharIndex: Int): Option[NCNlpSentenceNote] =
+    def getInitialNlpNote(startCharIndex: Int, endCharIndex: Int): Option[NCNlpSentenceNote] =
         initNlpNotes.get(NoteKey(startCharIndex, endCharIndex))
 
     /**
@@ -924,11 +210,18 @@ class NCNlpSentence(
       * @param endCharIndex
       * @return
       */
-    def findNlpToken(noteType: String, startCharIndex: Int, endCharIndex: Int): Option[NCNlpSentenceToken] =
+    def getNlpToken(noteType: String, startCharIndex: Int, endCharIndex: Int): Option[NCNlpSentenceToken] =
         nlpTokens.get(TokenKey(noteType, startCharIndex, endCharIndex))
 
     /**
       *
       */
     def getDeletedNotes: Predef.Map[NCNlpSentenceNote, Seq[NCNlpSentenceToken]] = deletedNotes.toMap
+
+    /***
+      *
+      * @param deletedNotes
+      */
+    def addDeletedNotes(deletedNotes: Map[NCNlpSentenceNote, Seq[NCNlpSentenceToken]]): Unit =
+       this.deletedNotes ++= deletedNotes
 }
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/NCProbeBoot.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/NCProbeBoot.scala
index 19308e3..da13b07 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/NCProbeBoot.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/NCProbeBoot.scala
@@ -49,6 +49,7 @@ import org.apache.nlpcraft.probe.mgrs.nlp.enrichers.sort.NCSortEnricher
 import org.apache.nlpcraft.probe.mgrs.nlp.enrichers.stopword.NCStopWordEnricher
 import org.apache.nlpcraft.probe.mgrs.nlp.enrichers.suspicious.NCSuspiciousNounsEnricher
 import org.apache.nlpcraft.probe.mgrs.nlp.validate.NCValidateManager
+import org.apache.nlpcraft.probe.mgrs.sentence.NCSentenceManager
 import resource.managed
 
 import java.io._
@@ -512,6 +513,7 @@ private [probe] object NCProbeBoot extends LazyLogging with NCOpenCensusTrace {
             startedMgrs += NCProbeEnrichmentManager.start(span)
             startedMgrs += NCConnectionManager.start(span)
             startedMgrs += NCDialogFlowManager.start(span)
+            startedMgrs += NCSentenceManager.start(span)
         }
     }
 
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeVariants.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeVariants.scala
index aefdbd1..bbf7630 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeVariants.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeVariants.scala
@@ -230,7 +230,7 @@ object NCProbeVariants {
                                         nlpTokOpt match {
                                             case Some(nlpTok) ⇒ mkToken(nlpTok)
                                             case None ⇒
-                                                nlpSen.findInitialNlpNote(key.from, key.to) match {
+                                                nlpSen.getInitialNlpNote(key.from, key.to) match {
                                                     case Some(nlpNote) ⇒
                                                         val artTok = NlpToken(IDX)
 
@@ -262,7 +262,7 @@ object NCProbeVariants {
                             for (tok ← parts)
                                 process(tok,
                                     nlpSen.
-                                        findNlpToken(tok.getId, tok.getStartCharIndex, tok.getEndCharIndex).
+                                        getNlpToken(tok.getId, tok.getStartCharIndex, tok.getEndCharIndex).
                                         getOrElse(throw new NCE(s"Token not found for $tok"))
                                 )
 
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/NCProbeEnrichmentManager.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/NCProbeEnrichmentManager.scala
index 368f0c4..c328e57 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/NCProbeEnrichmentManager.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/NCProbeEnrichmentManager.scala
@@ -43,6 +43,7 @@ import org.apache.nlpcraft.probe.mgrs.nlp.enrichers.stopword.NCStopWordEnricher
 import org.apache.nlpcraft.probe.mgrs.nlp.enrichers.suspicious.NCSuspiciousNounsEnricher
 import org.apache.nlpcraft.probe.mgrs.nlp.impl._
 import org.apache.nlpcraft.probe.mgrs.nlp.validate._
+import org.apache.nlpcraft.probe.mgrs.sentence.NCSentenceManager
 import org.apache.nlpcraft.probe.mgrs.{NCProbeMessage, NCProbeVariants}
 
 import java.io.Serializable
@@ -500,7 +501,7 @@ object NCProbeEnrichmentManager extends NCService with NCOpenCensusModelStats {
                         s"]")
             }
 
-            nlpSen.clone().collapse(mdl.model, lastPhase = true).
+            NCSentenceManager.collapse(mdl.model, nlpSen.clone(), lastPhase = true).
                 // Sorted to support deterministic logs.
                 sortBy(p ⇒
                 p.map(p ⇒ {
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
index 2a9dec0..0a11314 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
@@ -21,9 +21,12 @@ import io.opencensus.trace.Span
 import org.apache.nlpcraft.common._
 import org.apache.nlpcraft.common.nlp.{NCNlpSentenceToken, NCNlpSentenceTokenBuffer, _}
 import org.apache.nlpcraft.model._
+import org.apache.nlpcraft.model.impl.NCTokenLogger
 import org.apache.nlpcraft.probe.mgrs.NCProbeSynonymChunkKind.{NCSynonymChunkKind, TEXT}
 import org.apache.nlpcraft.probe.mgrs.nlp.NCProbeEnricher
 import org.apache.nlpcraft.probe.mgrs.nlp.impl.NCRequestImpl
+import org.apache.nlpcraft.probe.mgrs.sentence.NCSentenceManager
+import org.apache.nlpcraft.probe.mgrs.{NCProbeModel, NCProbeSynonym, NCProbeVariants}
 import org.apache.nlpcraft.probe.mgrs.{NCProbeModel, NCProbeSynonym, NCProbeVariants}
 
 import java.io.Serializable
@@ -355,7 +358,11 @@ object NCModelEnricher extends NCProbeEnricher with DecorateAsScala {
                 toks.map(t ⇒ (t.origText, t.index)).mkString(" ")
 
             var permCnt = 0
-            lazy val collapsedSens = NCProbeVariants.convert(ns.srvReqId, mdl, ns.clone().collapse(mdl.model)).map(_.asScala)
+            lazy val collapsedSens = NCProbeVariants.convert(
+                ns.srvReqId,
+                mdl,
+                NCSentenceManager.collapse(mdl.model, ns.clone())
+            ).map(_.asScala)
 
             /**
               *
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/sentence/NCSentenceHelper.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/sentence/NCSentenceHelper.java
new file mode 100644
index 0000000..1e215ad
--- /dev/null
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/sentence/NCSentenceHelper.java
@@ -0,0 +1,199 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nlpcraft.probe.mgrs.sentence;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.List;
+import java.util.Set;
+import java.util.concurrent.ForkJoinPool;
+import java.util.concurrent.RecursiveTask;
+
+import static java.util.stream.Collectors.toList;
+
+/**
+ * It is not converted to scala because scala and java long values implicit conversion performance problems.
+ */
+class NCSentenceHelper extends RecursiveTask<List<Long>> {
+    private static final long THRESHOLD = (long)Math.pow(2, 20);
+
+    private final long lo;
+    private final long hi;
+    private final long[] wordBits;
+    private final int[] wordCounts;
+
+    private NCSentenceHelper(long lo, long hi, long[] wordBits, int[] wordCounts) {
+        this.lo = lo;
+        this.hi = hi;
+        this.wordBits = wordBits;
+        this.wordCounts = wordCounts;
+    }
+
+    private List<Long> computeLocal() {
+        List<Long> res = new ArrayList<>();
+
+        for (long comboBits = lo; comboBits < hi; comboBits++) {
+            boolean match = true;
+
+            // For each input row we check if subtracting the current combination of words
+            // from the input row would give us the expected result.
+            for (int j = 0; j < wordBits.length; j++) {
+                // Get bitmask of how many words can be subtracted from the row.
+                // Check if there is more than 1 word remaining after subtraction.
+                if (wordCounts[j] - Long.bitCount(wordBits[j] & comboBits) > 1) {
+                    // Skip this combination.
+                    match = false;
+
+                    break;
+                }
+            }
+
+            if (match && excludes(comboBits, res))
+                res.add(comboBits);
+        }
+
+        return res;
+    }
+
+    private List<Long> forkJoin() {
+        long mid = lo + hi >>> 1L;
+
+        NCSentenceHelper t1 = new NCSentenceHelper(lo, mid, wordBits, wordCounts);
+        NCSentenceHelper t2 = new NCSentenceHelper(mid, hi, wordBits, wordCounts);
+
+        t2.fork();
+
+        return merge(t1.compute(), t2.join());
+    }
+
+    private static List<Long> merge(List<Long> l1, List<Long> l2) {
+        if (l1.isEmpty())
+            return l2;
+        else if (l2.isEmpty())
+            return l1;
+
+        int size1 = l1.size();
+        int size2 = l2.size();
+
+        if (size1 == 1 && size2 > 1 || size2 == 1 && size1 > 1) {
+            // Minor optimization in case if one of the lists has only one element.
+            List<Long> res = size1 == 1 ? l2 : l1;
+            Long val = size1 == 1 ? l1.get(0) : l2.get(0);
+
+            if (excludes(val, res))
+                res.add(val);
+
+            return res;
+        }
+
+        List<Long> res = new ArrayList<>(size1 + size2);
+
+        for (int i = 0, max = Math.max(size1, size2); i < max; i++) {
+            Long v1 = i < size1 ? l1.get(i) : null;
+            Long v2 = i < size2 ? l2.get(i) : null;
+
+            if (v1 != null && v2 != null) {
+                if (containsAllBits(v1, v2))
+                    v1 = null;
+                else if (containsAllBits(v2, v1))
+                    v2 = null;
+            }
+
+            if (v1 != null && excludes(v1, res))
+                res.add(v1);
+
+            if (v2 != null && excludes(v2, res))
+                res.add(v2);
+        }
+
+        return res;
+    }
+
+    private static boolean excludes(long bits, List<Long> allBits) {
+        for (Long allBit : allBits)
+            if (containsAllBits(bits, allBit))
+                return false;
+
+        return true;
+    }
+
+    private static boolean containsAllBits(long bitSet1, long bitSet2) {
+        return (bitSet1 & bitSet2) == bitSet2;
+    }
+
+    private static <T> long wordsToBits(Set<T> words, List<T> dict) {
+        long bits = 0;
+
+        for (int i = 0, n = dict.size(); i < n; i++)
+            if (words.contains(dict.get(i)))
+                bits |= 1L << i;
+
+        return bits;
+    }
+
+    private static <T> List<T> bitsToWords(long bits, List<T> dict) {
+        List<T> words = new ArrayList<>(Long.bitCount(bits));
+
+        for (int i = 0, n = dict.size(); i < n; i++)
+            if ((bits & 1L << i) != 0)
+                words.add(dict.get(i));
+
+        return words;
+    }
+
+    @Override
+    protected List<Long> compute() {
+        return hi - lo <= THRESHOLD ? computeLocal() : forkJoin();
+    }
+
+    /**
+     *
+     * @param words
+     * @param pool
+     * @param <T>
+     * @return
+     */
+    static <T> List<List<T>> findCombinations(List<Set<T>> words, ForkJoinPool pool) {
+        assert words != null && !words.isEmpty();
+        assert pool != null;
+
+        if (words.stream().allMatch(p -> p.size() == 1))
+            return Collections.singletonList(Collections.emptyList());
+
+        // Build dictionary of unique words.
+        List<T> dict = words.stream().flatMap(Collection::stream).distinct().collect(toList());
+
+        if (dict.size() > Long.SIZE)
+            // Note: Power set of 64 words results in 9223372036854775807 combinations.
+            throw new IllegalArgumentException("Dictionary is too long: " + dict.size());
+
+        // Convert words to bitmasks (each bit corresponds to an index in the dictionary).
+        long[] wordBits =
+            words.stream().sorted(Comparator.comparingInt(Set::size)).mapToLong(row -> wordsToBits(row, dict)).toArray();
+
+        // Cache words count per row.
+        int[] wordCounts = words.stream().sorted(Comparator.comparingInt(Set::size)).mapToInt(Set::size).toArray();
+
+        // Prepare Fork/Join task to iterate over the power set of all combinations.
+        return
+            pool.invoke(new NCSentenceHelper(1, (long)Math.pow(2, dict.size()), wordBits, wordCounts)).
+                stream().map(bits -> bitsToWords(bits, dict)).collect(toList());
+    }
+}
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/sentence/NCSentenceManager.scala
similarity index 70%
copy from nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala
copy to nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/sentence/NCSentenceManager.scala
index 113e088..470776c 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/sentence/NCSentenceManager.scala
@@ -15,28 +15,27 @@
  * limitations under the License.
  */
 
-package org.apache.nlpcraft.common.nlp
+package org.apache.nlpcraft.probe.mgrs.sentence
 
-import com.typesafe.scalalogging.LazyLogging
-import org.apache.nlpcraft.common.NCE
+import io.opencensus.trace.Span
+import org.apache.nlpcraft.common.nlp.NCNlpSentence.NoteLink
 import org.apache.nlpcraft.common.nlp.pos.NCPennTreebank
+import org.apache.nlpcraft.common.nlp.{NCNlpSentence, NCNlpSentenceNote, NCNlpSentenceToken}
+import org.apache.nlpcraft.common.{NCE, NCService, U}
 import org.apache.nlpcraft.model.NCModel
 
+import java.io.{Serializable ⇒ JSerializable}
 import java.util
 import java.util.{List ⇒ JList}
-import java.io.{Serializable ⇒ JSerializable}
-import java.util.Collections
-import scala.collection.JavaConverters._
-import scala.collection.mutable.ArrayBuffer
-import scala.collection.{Map, Seq, Set, mutable}
+import scala.collection.JavaConverters.{asScalaBufferConverter, _}
+import scala.collection.{Map, Seq, mutable}
 import scala.language.implicitConversions
 
-object NCNlpSentence extends LazyLogging {
-    implicit def toTokens(x: NCNlpSentence): ArrayBuffer[NCNlpSentenceToken] = x.tokens
-
-    case class NoteKey(start: Int, end: Int)
-    case class TokenKey(id: String, start: Int, end: Int)
-    case class NoteLink(note: String, indexes: Seq[Int])
+/**
+  * Sentences processing manager.
+  */
+object NCSentenceManager extends NCService {
+    @volatile private var pool: java.util.concurrent.ForkJoinPool = _
 
     case class PartKey(id: String, start: Int, end: Int) {
         require(start <= end)
@@ -44,6 +43,7 @@ object NCNlpSentence extends LazyLogging {
         private def in(i: Int): Boolean = i >= start && i <= end
         def intersect(id: String, start: Int, end: Int): Boolean = id == this.id && (in(start) || in(end))
     }
+
     object PartKey {
         def apply(m: util.HashMap[String, JSerializable]): PartKey = {
             def get[T](name: String): T = m.get(name).asInstanceOf[T]
@@ -55,6 +55,10 @@ object NCNlpSentence extends LazyLogging {
             PartKey(t.noteType, sen(t.tokenFrom).startCharIndex, sen(t.tokenTo).endCharIndex)
     }
 
+    /**
+      *
+      * @param notes
+      */
     private def getLinks(notes: Seq[NCNlpSentenceNote]): Seq[NoteLink] = {
         val noteLinks = mutable.ArrayBuffer.empty[NoteLink]
 
@@ -71,7 +75,7 @@ object NCNlpSentence extends LazyLogging {
                 noteLinks ++=
                     (for ((name, idxs) ← names.asScala.zip(idxsSeq.asScala.map(_.asScala)))
                         yield NoteLink(name, idxs.sorted)
-                    )
+                        )
             }
 
             if (n.contains("subjnotes")) add("subjnotes", "subjindexes")
@@ -81,6 +85,10 @@ object NCNlpSentence extends LazyLogging {
         noteLinks
     }
 
+    /**
+      *
+      * @param notes
+      */
     private def getPartKeys(notes: NCNlpSentenceNote*): Seq[PartKey] =
         notes.
             filter(_.isUser).
@@ -460,13 +468,14 @@ object NCNlpSentence extends LazyLogging {
                     }
 
                     if (fixed.forall(_.size == 1))
-                        // Fix double dimension array to one dimension,
-                        // so it should be called always in spite of 'fixIndexesReferences' method.
+                    // Fix double dimension array to one dimension,
+                    // so it should be called always in spite of 'fixIndexesReferences' method.
                         ns.fixNote(n, idxsField → fixed.map(_.head).asJava.asInstanceOf[JSerializable])
                     else
                         ok = false
                 case None ⇒ // No-op.
             }
+
         ok &&
             ns.flatMap(_.getNotes(noteType)).forall(rel ⇒
                 rel.dataOpt[JList[Int]](idxsField) match {
@@ -538,116 +547,57 @@ object NCNlpSentence extends LazyLogging {
 
         res
     }
-}
-
-import org.apache.nlpcraft.common.nlp.NCNlpSentence._
-
-/**
-  * Parsed NLP sentence is a collection of tokens. Each token is a collection of notes and
-  * each note is a collection of KV pairs.
-  *
-  * @param srvReqId Server request ID.
-  * @param text Normalized text.
-  * @param enabledBuiltInToks Enabled built-in tokens.
-  * @param tokens Initial buffer.
-  * @param deletedNotes Deleted overridden notes with their tokens.
-  */
-class NCNlpSentence(
-    val srvReqId: String,
-    val text: String,
-    val enabledBuiltInToks: Set[String],
-    override val tokens: mutable.ArrayBuffer[NCNlpSentenceToken] = new mutable.ArrayBuffer[NCNlpSentenceToken](32),
-    private val deletedNotes: mutable.HashMap[NCNlpSentenceNote, Seq[NCNlpSentenceToken]] = mutable.HashMap.empty,
-    private var initNlpNotes: Map[NoteKey, NCNlpSentenceNote] = null,
-    private val nlpTokens: mutable.HashMap[TokenKey, NCNlpSentenceToken] = mutable.HashMap.empty
-) extends NCNlpSentenceTokenBuffer(tokens) with JSerializable {
-    @transient
-    private var hash: java.lang.Integer = _
-
-    private def calcHash(): Int =
-        Seq(srvReqId, text, enabledBuiltInToks, tokens).map(_.hashCode()).foldLeft(0)((a, b) ⇒ 31 * a + b)
-
-    private def addDeleted(sen: NCNlpSentence, dels: Iterable[NCNlpSentenceNote]): Unit =
-        sen.deletedNotes ++= dels.map(n ⇒ {
-            val savedDelNote = n.clone()
-            val savedDelToks = n.tokenIndexes.map(idx ⇒ this(idx).clone())
-
-            val mainNotes = savedDelToks.flatten.filter(n ⇒ n.noteType != "nlpcraft:nlp" && n != savedDelNote)
-
-            // Deleted note's tokens should contains only nlp data and deleted notes.
-            for (savedDelTok ← savedDelToks; mainNote ← mainNotes)
-                savedDelTok.remove(mainNote)
-
-            savedDelNote → savedDelToks
-        })
-
-    // Deep copy.
-    override def clone(): NCNlpSentence =
-        new NCNlpSentence(
-            srvReqId,
-            text,
-            enabledBuiltInToks,
-            tokens.map(_.clone()),
-            deletedNotes.map(p ⇒ p._1.clone() → p._2.map(_.clone())),
-            initNlpNotes = initNlpNotes
-        )
 
     /**
-      * Utility method that gets set of notes for given note type collected from
-      * tokens in this sentence. Notes are sorted in the same order they appear
-      * in this sentence.
       *
-      * @param noteType Note type.
-      */
-    def getNotes(noteType: String): Seq[NCNlpSentenceNote] = this.flatMap(_.getNotes(noteType)).distinct
-
-    /**
-      * Utility method that removes note with given ID from all tokens in this sentence.
-      * No-op if such note wasn't found.
-      *
-      * @param note Note.
+      * @param mdl
+      * @param ns
       */
-    def removeNote(note: NCNlpSentenceNote): Unit = this.foreach(_.remove(note))
-
-    //noinspection HashCodeUsesVar
-    override def hashCode(): Int = {
-        if (hash == null)
-            hash = calcHash()
-
-        hash
-    }
-
-    def fixNote(note: NCNlpSentenceNote, kvs: (String, JSerializable)*): Unit = {
-        val fixed = note.clone(kvs: _*)
-
-        this.filter(t ⇒ t.index >= fixed.tokenIndexes.head && t.index <= fixed.tokenIndexes.last).foreach(t ⇒ {
-            t.remove(note)
-            t.add(fixed)
-        })
-
-        hash = null
-    }
-
     private def dropAbstract(mdl: NCModel, ns: NCNlpSentence): Unit =
         if (!mdl.getAbstractTokens.isEmpty) {
             val notes = ns.flatten
 
-            val keys = getPartKeys(notes :_*)
+            val keys = getPartKeys(notes: _*)
             val noteLinks = getLinks(notes)
 
             notes.filter(n ⇒ {
                 val noteToks = ns.tokens.filter(_.contains(n))
 
                 mdl.getAbstractTokens.contains(n.noteType) &&
-                !keys.exists(_.intersect(n.noteType, noteToks.head.startCharIndex, noteToks.last.startCharIndex)) &&
-                !noteLinks.contains(NoteLink(n.noteType, n.tokenIndexes.sorted))
+                    !keys.exists(_.intersect(n.noteType, noteToks.head.startCharIndex, noteToks.last.startCharIndex)) &&
+                    !noteLinks.contains(NoteLink(n.noteType, n.tokenIndexes.sorted))
             }).foreach(ns.removeNote)
         }
 
+    /**
+      *
+      * @param toks
+      * @return
+      */
     private def getNotNlpNotes(toks: Seq[NCNlpSentenceToken]): Seq[NCNlpSentenceNote] =
         toks.flatten.filter(!_.isNlp).distinct
 
     /**
+      *
+      * @param thisSen
+      * @param sen
+      * @param dels
+      */
+    private def addDeleted(thisSen: NCNlpSentence, sen: NCNlpSentence, dels: Iterable[NCNlpSentenceNote]): Unit =
+        sen.addDeletedNotes(dels.map(n ⇒ {
+            val savedDelNote = n.clone()
+            val savedDelToks = n.tokenIndexes.map(idx ⇒ thisSen(idx).clone())
+
+            val mainNotes = savedDelToks.flatten.filter(n ⇒ n.noteType != "nlpcraft:nlp" && n != savedDelNote)
+
+            // Deleted note's tokens should contains only nlp data and deleted notes.
+            for (savedDelTok ← savedDelToks; mainNote ← mainNotes)
+                savedDelTok.remove(mainNote)
+
+            savedDelNote → savedDelToks
+        }).toMap)
+
+    /**
       * This collapser handles several tasks:
       * - "overall" collapsing after all other individual collapsers had their turn.
       * - Special further enrichment of tokens like linking, etc.
@@ -656,7 +606,7 @@ class NCNlpSentence(
       * lengths - the winning note is chosen based on this priority.
       */
     @throws[NCE]
-    def collapse(mdl: NCModel, lastPhase: Boolean = false): Seq[NCNlpSentence] = {
+    private def collapseSentence(sen: NCNlpSentence, mdl: NCModel, lastPhase: Boolean = false): Seq[NCNlpSentence] = {
         def collapse0(ns: NCNlpSentence): Option[NCNlpSentence] = {
             if (lastPhase)
                 dropAbstract(mdl, ns)
@@ -669,7 +619,7 @@ class NCNlpSentence(
         // We keep only one variant -  with `best` direct and sparsity parameters,
         // other variants for these words are redundant.
         val redundant: Seq[NCNlpSentenceNote] =
-            this.flatten.filter(!_.isNlp).distinct.
+            sen.flatten.filter(!_.isNlp).distinct.
                 groupBy(_.getKey()).
                 map(p ⇒ p._2.sortBy(p ⇒
                     (
@@ -691,15 +641,15 @@ class NCNlpSentence(
                 flatMap(_.drop(1)).
                 toSeq
 
-        redundant.foreach(this.removeNote)
+        redundant.foreach(sen.removeNote)
 
         var delCombs: Seq[NCNlpSentenceNote] =
-            getNotNlpNotes(this).
-                flatMap(note ⇒ getNotNlpNotes(note.tokenIndexes.sorted.map(i ⇒ this(i))).filter(_ != note)).
+            getNotNlpNotes(sen).
+                flatMap(note ⇒ getNotNlpNotes(note.tokenIndexes.sorted.map(i ⇒ sen(i))).filter(_ != note)).
                 distinct
 
         // Optimization. Deletes all wholly swallowed notes.
-        val links = getLinks(this.flatten)
+        val links = getLinks(sen.flatten)
 
         val swallowed =
             delCombs.
@@ -709,7 +659,7 @@ class NCNlpSentence(
                 filter(getPartKeys(_).isEmpty).
                 flatMap(note ⇒ {
                     val noteWordsIdxs = note.wordIndexes.toSet
-                    val key = PartKey(note, this)
+                    val key = PartKey(note, sen)
 
                     val delCombOthers =
                         delCombs.filter(_ != note).flatMap(n ⇒ if (getPartKeys(n).contains(key)) Some(n) else None)
@@ -717,55 +667,33 @@ class NCNlpSentence(
                     if (delCombOthers.exists(o ⇒ noteWordsIdxs == o.wordIndexes.toSet)) Some(note) else None
                 })
 
-        delCombs = delCombs.filter(p ⇒ !swallowed.contains(p))
-        addDeleted(this, swallowed)
-        swallowed.foreach(this.removeNote)
-
-        val toksByIdx: Seq[Seq[NCNlpSentenceNote]] =
-            delCombs.flatMap(note ⇒ note.wordIndexes.map(_ → note)).
-                groupBy { case (idx, _) ⇒ idx }.
-                map { case (_, seq) ⇒ seq.map { case (_, note) ⇒ note } }.
-                toSeq.sortBy(-_.size)
 
-        val minDelSize = if (toksByIdx.isEmpty) 1 else toksByIdx.map(_.size).max - 1
+        delCombs = delCombs.filter(p ⇒ !swallowed.contains(p))
+        addDeleted(sen, sen, swallowed)
+        swallowed.foreach(sen.removeNote)
 
         var sens =
             if (delCombs.nonEmpty) {
-                val deleted = mutable.ArrayBuffer.empty[Set[NCNlpSentenceNote]]
+                val toksByIdx =
+                    delCombs.flatMap(note ⇒ note.wordIndexes.map(_ → note)).
+                        groupBy { case (idx, _) ⇒ idx }.
+                        map { case (_, seq) ⇒ seq.map { case (_, note) ⇒ note }.toSet }.
+                        toSeq.sortBy(-_.size)
 
                 val sens =
-                    (minDelSize to delCombs.size).
-                        flatMap(i ⇒
-                            delCombs.combinations(i).
-                                filter(delComb ⇒
-                                    !toksByIdx.exists(
-                                        rec ⇒
-                                            rec.size - delCombs.size <= 1 &&
-                                            rec.count(note ⇒ !delComb.contains(note)) > 1
-                                    )
-                                )
-                        ).
-                        sortBy(_.size).
-                        map(_.toSet).
-                        flatMap(delComb ⇒
-                            // Already processed with less subset of same deleted tokens.
-                            if (!deleted.exists(_.subsetOf(delComb))) {
-                                val nsClone = this.clone()
-
-                                // Saves deleted notes for sentence and their tokens.
-                                addDeleted(nsClone, delComb)
-                                delComb.foreach(nsClone.removeNote)
-
-                                // Has overlapped notes for some tokens.
-                                require(!nsClone.exists(_.count(!_.isNlp) > 1))
-
-                                deleted += delComb
-
-                                collapse0(nsClone)
-                            }
-                            else
-                                None
-                        )
+                    NCSentenceHelper.findCombinations(toksByIdx.map(_.asJava).asJava, pool).asScala.map(_.asScala).
+                        flatMap(delComb ⇒ {
+                            val nsClone = sen.clone()
+
+                            // Saves deleted notes for sentence and their tokens.
+                            addDeleted(sen, nsClone, delComb)
+                            delComb.foreach(nsClone.removeNote)
+
+                            // Has overlapped notes for some tokens.
+                            require(!nsClone.exists(_.count(!_.isNlp) > 1))
+
+                            collapse0(nsClone)
+                        })
 
                 // It removes sentences which have only one difference - 'direct' flag of their user tokens.
                 // `Direct` sentences have higher priority.
@@ -802,7 +730,7 @@ class NCNlpSentence(
                 m.values.map(_.sentence).toSeq
             }
             else
-                collapse0(this).flatMap(p ⇒ Option(Seq(p))).getOrElse(Seq.empty)
+                collapse0(sen).flatMap(p ⇒ Option(Seq(p))).getOrElse(Seq.empty)
 
         sens = sens.distinct
 
@@ -823,112 +751,29 @@ class NCNlpSentence(
             toSeq
     }
 
-    /**
-      * Returns flag are note notes equal (or similar) or not. Reason of ignored difference can be stopwords tokens.
-      *
-      * @param n1 First note.
-      * @param n2 Second note.
-      */
-    def notesEqualOrSimilar(n1: NCNlpSentenceNote, n2: NCNlpSentenceNote): Boolean =
-        if (n1.noteType != n2.noteType)
-            false
-        else {
-            val stopIdxs = this.filter(_.isStopWord).map(_.index)
-
-            // One possible difference - stopwords indexes.
-            def wordsEqualOrSimilar0(n1: NCNlpSentenceNote, n2: NCNlpSentenceNote): Boolean = {
-                val set1 = n1.wordIndexes.toSet
-                val set2 = n2.wordIndexes.toSet
-
-                set1 == set2 || set1.subsetOf(set2) && set2.diff(set1).forall(stopIdxs.contains)
-            }
-
-            def wordsEqualOrSimilar(n1: NCNlpSentenceNote, n2: NCNlpSentenceNote): Boolean =
-                wordsEqualOrSimilar0(n1, n2) || wordsEqualOrSimilar0(n2, n1)
-
-            def tokensEqualOrSimilar0(set1: Set[NCNlpSentenceToken], set2: Set[NCNlpSentenceToken]): Boolean =
-                set1 == set2 || set1.subsetOf(set2) && set2.diff(set1).forall(_.isStopWord)
-
-            def tokensEqualOrSimilar(set1: Set[NCNlpSentenceToken], set2: Set[NCNlpSentenceToken]): Boolean =
-                tokensEqualOrSimilar0(set1, set2) || tokensEqualOrSimilar0(set2, set1)
-
-            def getList(n: NCNlpSentenceNote, refIdxName: String): Set[NCNlpSentenceToken] =
-                n.getOrElse(refIdxName, Collections.emptyList).asInstanceOf[JList[Int]].asScala.
-                    map(this (_)).toSet
+    override def start(parent: Span): NCService = {
+        ackStarting()
 
-            def getListList(n: NCNlpSentenceNote, refIdxName: String): Set[NCNlpSentenceToken] =
-                n.getOrElse(refIdxName, Collections.emptyList).asInstanceOf[JList[JList[Int]]].asScala.
-                    flatMap(_.asScala.map(this (_))).toSet
+        pool = new java.util.concurrent.ForkJoinPool()
 
-            def referencesEqualOrSimilar0(n1: NCNlpSentenceNote, n2: NCNlpSentenceNote): Boolean = {
-                require(n1.noteType == n2.noteType)
-
-                n1.noteType match {
-                    case "nlpcraft:sort" ⇒
-                        tokensEqualOrSimilar(getListList(n1, "subjindexes"), getListList(n2, "subjindexes")) &&
-                            tokensEqualOrSimilar(getListList(n1, "byindexes"), getListList(n2, "byindexes"))
-                    case "nlpcraft:limit" ⇒
-                        tokensEqualOrSimilar(getList(n1, "indexes"), getList(n2, "indexes"))
-                    case "nlpcraft:reference" ⇒
-                        tokensEqualOrSimilar(getList(n1, "indexes"), getList(n2, "indexes"))
-
-                    case _ ⇒ true
-                }
-            }
-
-            def referencesEqualOrSimilar(n1: NCNlpSentenceNote, n2: NCNlpSentenceNote): Boolean =
-                referencesEqualOrSimilar0(n1, n2) || referencesEqualOrSimilar0(n2, n1)
-
-            def getUniqueKey0(n: NCNlpSentenceNote): Seq[Any] = n.getKey(withIndexes = false, withReferences = false)
-
-            getUniqueKey0(n1) == getUniqueKey0(n2) && wordsEqualOrSimilar(n1, n2) && referencesEqualOrSimilar(n1, n2)
-        }
-
-    override def equals(obj: Any): Boolean = obj match {
-        case x: NCNlpSentence ⇒
-            tokens == x.tokens &&
-                srvReqId == x.srvReqId &&
-                text == x.text &&
-                enabledBuiltInToks == x.enabledBuiltInToks
-
-        case _ ⇒ false
+        ackStarted()
     }
 
-    /**
-      *
-      */
-    def saveNlpNotes(): Unit =
-        initNlpNotes = this.map(t ⇒ NoteKey(t.startCharIndex, t.endCharIndex) → t.getNlpNote).toMap
+    override def stop(parent: Span): Unit = {
+        ackStopping()
 
-    /**
-      *
-      * @return
-      */
-    def findInitialNlpNote(startCharIndex: Int, endCharIndex: Int): Option[NCNlpSentenceNote] =
-        initNlpNotes.get(NoteKey(startCharIndex, endCharIndex))
-
-    /**
-      *
-      * @param nlp
-      */
-    def addNlpToken(nlp: NCNlpSentenceToken): Unit = {
-        require(nlp.size <= 2)
+        U.shutdownPool(pool)
 
-        nlp.foreach(n ⇒ nlpTokens += TokenKey(n.noteType, nlp.startCharIndex, nlp.endCharIndex) → nlp)
+        ackStopped()
     }
 
     /**
       *
-      * @param noteType
-      * @param startCharIndex
-      * @param endCharIndex
+      * @param mdl
+      * @param sen
+      * @param lastPhase
       * @return
       */
-    def findNlpToken(noteType: String, startCharIndex: Int, endCharIndex: Int): Option[NCNlpSentenceToken] =
-        nlpTokens.get(TokenKey(noteType, startCharIndex, endCharIndex))
-
-    /**
-      *
-      */
-    def getDeletedNotes: Predef.Map[NCNlpSentenceNote, Seq[NCNlpSentenceToken]] = deletedNotes.toMap
+    def collapse(mdl: NCModel, sen: NCNlpSentence, lastPhase: Boolean = false): Seq[NCNlpSentence] =
+        collapseSentence(sen, mdl, lastPhase)
 }
diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCEnricherNestedModelSpec4.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCEnricherNestedModelSpec4.scala
new file mode 100644
index 0000000..b354533
--- /dev/null
+++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCEnricherNestedModelSpec4.scala
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nlpcraft.probe.mgrs.nlp.enrichers.model
+
+import org.apache.nlpcraft.model.{NCElement, NCIntent, NCIntentMatch, NCModelAdapter, NCResult}
+import org.apache.nlpcraft.{NCTestContext, NCTestElement, NCTestEnvironment}
+import org.junit.jupiter.api.Test
+
+import java.util
+import scala.collection.JavaConverters._
+
+/**
+  * Nested Elements test model.
+  */
+class NCNestedTestModel4 extends NCModelAdapter(
+    "nlpcraft.nested3.test.mdl", "Nested Data Test Model", "1.0"
+) {
+    override def getElements: util.Set[NCElement] =
+        Set(
+            NCTestElement("e1", "//[a-zA-Z0-9]+//"),
+            NCTestElement("e2", "the ^^(id == 'e1')^^")
+        )
+
+    override def getAbstractTokens: util.Set[String] = Set("e1").asJava
+    override def getEnabledBuiltInTokens: util.Set[String] = Set.empty[String].asJava
+
+    @NCIntent("intent=onE2 term(t1)={id == 'e2'}[8, 100]")
+    def onAB(ctx: NCIntentMatch): NCResult = NCResult.text("OK")
+}
+
+/**
+  * It shouldn't be too slow.
+  */
+@NCTestEnvironment(model = classOf[NCNestedTestModel4], startClient = true)
+class NCEnricherNestedModelSpec4 extends NCTestContext {
+    @Test
+    def test(): Unit = checkIntent("the a " * 8, "onE2")
+}
\ No newline at end of file