You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@nlpcraft.apache.org by se...@apache.org on 2021/09/28 09:55:31 UTC

[incubator-nlpcraft] branch NLPCRAFT-456 created (now 3000427)

This is an automated email from the ASF dual-hosted git repository.

sergeykamov pushed a change to branch NLPCRAFT-456
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git.


      at 3000427  Code cleanup.

This branch includes the following new commits:

     new 3000427  Code cleanup.

The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.

[incubator-nlpcraft] 01/01: Code cleanup.

Posted by se...@apache.org.

This is an automated email from the ASF dual-hosted git repository.

sergeykamov pushed a commit to branch NLPCRAFT-456
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git

commit 300042735860e626d90002b30b4d5b072892b73a
Author: Sergey Kamov <sk...@gmail.com>
AuthorDate: Tue Sep 28 12:55:21 2021 +0300

    Code cleanup.
---
 .../nlpcraft/probe/mgrs/NCProbeIdlToken.scala      |  41 +++++-
 .../apache/nlpcraft/probe/mgrs/NCProbeModel.scala  |   3 +
 .../probe/mgrs/nlp/NCProbeEnrichmentManager.scala  |   2 +-
 .../mgrs/nlp/enrichers/model/NCModelEnricher.scala | 104 +++++++-------
 .../probe/mgrs/sentence/NCSentenceManager.scala    |  25 ++--
 .../probe/mgrs/synonyms/NCSynonymsManager.scala    | 150 ++++++++++++---------
 6 files changed, 196 insertions(+), 129 deletions(-)

diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeIdlToken.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeIdlToken.scala
index 5da9808..d4fc27c 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeIdlToken.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeIdlToken.scala
@@ -26,11 +26,46 @@ import org.apache.nlpcraft.model.{NCToken, _}
   * @param word
   */
 case class NCProbeIdlToken(token: NCToken, word: NCNlpSentenceToken) {
-    val (origText: String, wordIndexes: Set[Int], minIndex: Int, maxIndex: Int, isToken: Boolean, isWord: Boolean) =
+    require(token != null ^ word != null)
+
+    val (
+        origText: String,
+        normText: String,
+        stem: String,
+        wordIndexes: Set[Int],
+        minIndex: Int,
+        maxIndex: Int,
+        startCharIndex: Int,
+        endCharIndex: Int,
+        isToken: Boolean,
+        isWord: Boolean
+        ) =
         if (token != null)
-            (token.origText, token.wordIndexes.toSet, token.wordIndexes.head, token.wordIndexes.last, true, false)
+            (
+                token.origText,
+                token.normText,
+                token.stem,
+                token.wordIndexes.toSet,
+                token.wordIndexes.head,
+                token.wordIndexes.last,
+                token.getStartCharIndex,
+                token.getEndCharIndex,
+                true,
+                false
+            )
         else
-            (word.origText, word.wordIndexes.toSet, word.wordIndexes.head, word.wordIndexes.last, false, true)
+            (
+                word.origText,
+                word.normText,
+                word.stem,
+                word.wordIndexes.toSet,
+                word.wordIndexes.head,
+                word.wordIndexes.last,
+                word.startCharIndex,
+                word.endCharIndex,
+                false,
+                true
+            )
 
     private lazy val hash = if (isToken) Seq(wordIndexes, token.getId).hashCode() else wordIndexes.hashCode()
 
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeModel.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeModel.scala
index ea41793..6b6a8e8 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeModel.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeModel.scala
@@ -62,6 +62,9 @@ case class NCProbeModel(
     lazy val hasNoIdlSynonyms: Boolean = continuousSynonyms.nonEmpty || sparseSynonyms.nonEmpty
     lazy val hasSparseSynonyms: Boolean = sparseSynonyms.nonEmpty || idlSynonyms.exists(_._2.exists(_.sparse))
     lazy val hasContinuousSynonyms: Boolean = continuousSynonyms.nonEmpty || idlSynonyms.exists(_._2.exists(!_.sparse))
+    lazy val isComplex: Boolean = hasIdlSynonyms || !model.getParsers.isEmpty
 
     def hasIdlSynonyms(elemId: String): Boolean = idlSynonyms.contains(elemId)
+
+
 }
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/NCProbeEnrichmentManager.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/NCProbeEnrichmentManager.scala
index fde865f..560ddff 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/NCProbeEnrichmentManager.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/NCProbeEnrichmentManager.scala
@@ -492,7 +492,7 @@ object NCProbeEnrichmentManager extends NCService with NCOpenCensusModelStats {
                 }).toMap
 
                 // Loop has sense if model is complex (has user defined parsers or IDL based synonyms)
-                continue = NCModelEnricher.isComplex(mdl) && res.exists { case (_, same) => !same }
+                continue = mdl.isComplex && res.exists { case (_, same) => !same }
 
                 if (DEEP_DEBUG)
                     if (continue) {
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
index 7196985..a39edfd 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
@@ -83,8 +83,6 @@ object NCModelEnricher extends NCProbeEnricher {
         ackStopped()
     }
 
-    def isComplex(mdl: NCProbeModel): Boolean = mdl.hasIdlSynonyms || !mdl.model.getParsers.isEmpty
-
     /**
       *
       * @param ns
@@ -180,7 +178,8 @@ object NCModelEnricher extends NCProbeEnricher {
                         new NCCustomElement() {
                             override def getElementId: String = noteId
                             override def getWords: JList[NCCustomWord] = words
-                            override def getMetadata: JavaMeta = md.map(p => p._1 -> p._2.asInstanceOf[AnyRef]).asJava
+                            override def getMetadata: JavaMeta =
+                                md.map { case (k, v) => k -> v.asInstanceOf[AnyRef] }.asJava
                         }
                     }).asJava
                 )
@@ -228,7 +227,7 @@ object NCModelEnricher extends NCProbeEnricher {
       *  Example: Piece: 'x1, x2(stopword), x3(stopword), x4' will be expanded  into
       *  {'x1, x2, x3, x4', 'x1, x2, x4', 'x1, x3, x4', 'x1, x4'}
       *
-      *  3. All variants collected, duplicated deleted, etc.
+      *  3. All variants collected, duplicated sets deleted, etc.
       *
       * @param toks
       */
@@ -244,7 +243,7 @@ object NCModelEnricher extends NCProbeEnricher {
                 else
                     slides += mutable.ArrayBuffer.empty :+ stop
 
-            // Too many stopords inside skipped.
+            // Too many stopwords inside skipped.
             val bigSlides = slides.filter(_.size > 2)
 
             var stops4Delete: Seq[Seq[NlpToken]] =
@@ -255,7 +254,7 @@ object NCModelEnricher extends NCProbeEnricher {
                     if (stops4AllCombs.nonEmpty)
                         for (
                             seq1 <- Range.inclusive(0, stops4AllCombs.size).flatMap(stops4AllCombs.combinations);
-                            seq2 <- Range.inclusive(0, bigSlides.size).flatMap(bigSlides.combinations)
+                                seq2 <- Range.inclusive(0, bigSlides.size).flatMap(bigSlides.combinations)
                         )
                         yield seq1 ++ seq2.flatten
                     else
@@ -268,11 +267,10 @@ object NCModelEnricher extends NCProbeEnricher {
             stops4Delete = stops4Delete.filter(seq => !seq.contains(combo.head) && !seq.contains(combo.last))
 
             (Seq(combo) ++ stops4Delete.map(del => combo.filter(t => !del.contains(t)))).map(_ -> combo).distinct
-
         }).
-            filter(_._1.nonEmpty).
-            groupBy(_._1).
-            map(p => p._1 -> p._2.map(_._2).minBy(p => (-p.size, p.head.index))).
+            filter { case (seq, _) => seq.nonEmpty }.
+            groupBy { case (seq, _) => seq }.
+            map { case (toksKey, seq) => toksKey -> seq.map(_._2).minBy(p => (-p.size, p.head.index)) }.
             sortBy { case(data, combo) => (-combo.size, -data.size, combo.head.index, data.head.index) }
 
     /**
@@ -297,15 +295,17 @@ object NCModelEnricher extends NCProbeEnricher {
 
     /**
       *
-      * @param tows
+      * @param idlToks
       * @param ns
       */
-    private def toTokens(tows: Seq[IdlToken], ns: Sentence): Seq[NlpToken] =
-        (
-            tows.filter(_.isWord).map(_.word) ++
-                tows.filter(_.isToken).map(_.token).
-                    flatMap(w => ns.filter(t => t.wordIndexes.intersect(w.wordIndexes).nonEmpty))
-        ).sortBy(_.startCharIndex)
+    private def toNlpTokens(idlToks: Seq[IdlToken], ns: Sentence): Seq[NlpToken] = {
+        val words = idlToks.filter(_.isWord).map(_.word)
+        val suitableToks =
+            idlToks.filter(_.isToken).map(_.token).
+                flatMap(w => ns.filter(t => t.wordIndexes.intersect(w.wordIndexes).nonEmpty))
+
+        (words ++ suitableToks).sortBy(_.startCharIndex)
+    }
 
     /**
       *
@@ -378,6 +378,7 @@ object NCModelEnricher extends NCProbeEnricher {
     }
 
     /**
+      * Prepares IDL tokens based on NLP tokens.
       *
       * @param h
       * @param toks
@@ -391,9 +392,7 @@ object NCModelEnricher extends NCProbeEnricher {
 
                 // Drops without tokens (IDL part works with tokens).
                 if (rec.nonEmpty)
-                    Some(rec ++
-                        (seq.wordsIndexes.intersect(idxs) -- rec.flatMap(_.wordIndexes)).map(h.tokens)
-                    )
+                    Some(rec ++ (seq.wordsIndexes.intersect(idxs) -- rec.flatMap(_.wordIndexes)).map(h.tokens))
                 else
                     None
             }).seq
@@ -440,11 +439,11 @@ object NCModelEnricher extends NCProbeEnricher {
                     for (
                         // 'toksExt' is piece of sentence, 'toks' is the same as 'toksExt' or without some stopwords set.
                         (toks, toksExt) <- combosTokens(ns.toSeq);
-                        idxs = toks.map(_.index);
-                        e <- mdl.elements.values;
-                        elemId = e.getId;
-                        greedy = e.isGreedy.orElse(mdl.model.isGreedy)
-                        if !greedy || !alreadyMarked(ns, elemId, toks, idxs)
+                            idxs = toks.map(_.index);
+                            e <- mdl.elements.values;
+                            elemId = e.getId;
+                            greedy = e.isGreedy.orElse(mdl.model.isGreedy)
+                            if !greedy || !alreadyMarked(ns, elemId, toks, idxs)
                     ) {
                         def add(
                             dbgType: String,
@@ -456,7 +455,7 @@ object NCModelEnricher extends NCProbeEnricher {
 
                             val ok =
                                 (!greedy || !alreadyMarked(ns, elemId, elemToks, idxs)) &&
-                                 ( parts.isEmpty || !parts.exists { case (t, _) => t.getId == elemId })
+                                    ( parts.isEmpty || !parts.exists { case (tok, _) => tok.getId == elemId })
 
                             if (ok)
                                 mark(
@@ -563,7 +562,7 @@ object NCModelEnricher extends NCProbeEnricher {
                                     )
                             }
                             else
-                                // 2.2 Sparse.
+                            // 2.2 Sparse.
                                 for (syn <- allSyns; comb <- allCombs)
                                     NCSynonymsManager.onSparseMatch(
                                         ns.srvReqId,
@@ -573,7 +572,7 @@ object NCModelEnricher extends NCProbeEnricher {
                                         req,
                                         variantsToks,
                                         res => {
-                                            val toks = getSparsedTokens(toTokens(res, ns), toTokens(comb, ns))
+                                            val toks = getSparsedTokens(toNlpTokens(res, ns), toNlpTokens(comb, ns))
                                             val parts = toParts(mdl, ns.srvReqId, res, syn)
                                             val typ = if (syn.sparse) "IDL sparse"else "IDL continuous"
 
@@ -607,6 +606,9 @@ object NCModelEnricher extends NCProbeEnricher {
       * @param ns
       */
     private def normalize(ns: Sentence): Unit = {
+        // Find and removes user notes if sentence contains notes with similar structure but less count of swallowed stop-words.
+        // These stop-words can be used fro detection another user tokens and harmless if they are free words.
+        // Notes with links and with references on them - aren't touched.
         val usrNotes = ns.flatten.filter(_.isUser).distinct
         val links = NCSentenceManager.getLinks(usrNotes)
         val parts = NCSentenceManager.getPartKeys(usrNotes)
@@ -638,28 +640,34 @@ object NCModelEnricher extends NCProbeEnricher {
     // TODO: simplify, add tests, check model properties (sparse etc) for optimization.
     /**
       *
-      * @param elmId
-      * @param toks
-      * @param sliceToksIdxsSorted
+      * @param elmId Element ID.
+      * @param toks Tokens.
+      * @param idxs Indexes, note that it can be not exactly tokens indexes (sparse case)
       */
-    private def alreadyMarked(ns: Sentence, elmId: String, toks: Seq[NlpToken], sliceToksIdxsSorted: Seq[Int]): Boolean = {
+    private def alreadyMarked(ns: Sentence, elmId: String, toks: Seq[NlpToken], idxs: Seq[Int]): Boolean = {
         lazy val toksIdxsSorted = toks.map(_.index).sorted
 
-        sliceToksIdxsSorted.map(ns).forall(_.exists(n => n.noteType == elmId && n.sparsity == 0)) ||
-        toks.exists(_.exists(n =>
-            n.noteType == elmId &&
-            (
-                (n.sparsity == 0 &&
-                    (sliceToksIdxsSorted.containsSlice(n.tokenIndexes) || n.tokenIndexes.containsSlice(toksIdxsSorted))
-                )
-                    ||
-                (
-                    n.tokenIndexes == toksIdxsSorted ||
-                    n.tokenIndexes.containsSlice(toksIdxsSorted) &&
-                    U.isContinuous(toksIdxsSorted) &&
-                    U.isContinuous(n.tokenIndexes)
-                )
-            )
-        ))
+        // All tokens with given indexes found with zero sparsity.
+        val ok1 = idxs.map(ns).forall(_.exists(n => n.noteType == elmId && n.sparsity == 0))
+
+        lazy val ok2 =
+            toks.exists(_.exists(n =>
+                if (n.noteType == elmId) {
+                    val noteOk1 = n.sparsity == 0 &&
+                        (idxs.containsSlice(n.tokenIndexes) || n.tokenIndexes.containsSlice(toksIdxsSorted))
+
+                    lazy val noteOk2 =
+                        n.tokenIndexes == toksIdxsSorted ||
+                            n.tokenIndexes.containsSlice(toksIdxsSorted) &&
+                                U.isContinuous(toksIdxsSorted) &&
+                                U.isContinuous(n.tokenIndexes)
+
+                    noteOk1 || noteOk2
+                }
+                else
+                    false
+            ))
+
+        ok1 || ok2
     }
 }
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/sentence/NCSentenceManager.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/sentence/NCSentenceManager.scala
index 00d6bdf..f9f7a01 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/sentence/NCSentenceManager.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/sentence/NCSentenceManager.scala
@@ -369,7 +369,7 @@ object NCSentenceManager extends NCService {
         val t = NCNlpSentenceToken(idx)
 
         // Note, it adds stop-words too.
-        val content = nsCopyToks.zipWithIndex.filter(p => indexes.contains(p._2)).map(_._1)
+        val content = nsCopyToks.zipWithIndex.filter { case (_, idx) => indexes.contains(idx) }.map { case (tok, _) => tok}
 
         content.foreach(t => history += t.index -> idx)
 
@@ -378,15 +378,12 @@ object NCSentenceManager extends NCService {
 
             val n = content.size - 1
 
-            content.zipWithIndex.foreach(p => {
-                val t = p._1
-                val idx = p._2
-
+            content.zipWithIndex.foreach { case (t, idx) =>
                 buf += get(t)
 
                 if (idx < n && t.endCharIndex != content(idx + 1).startCharIndex)
                     buf += " "
-            })
+            }
 
             buf.mkString
         }
@@ -459,8 +456,7 @@ object NCSentenceManager extends NCService {
         for (tok <- ns.filter(_.isTypeOf(noteType)) if ok)
             tok.getNoteOpt(noteType, idxsField) match {
                 case Some(n) =>
-                    val idxs: Seq[Seq[Int]] =
-                        n.data[JList[JList[Int]]](idxsField).asScala.map(_.asScala.toSeq).toSeq
+                    val idxs: Seq[Seq[Int]] = n.data[JList[JList[Int]]](idxsField).asScala.map(_.asScala.toSeq).toSeq
                     var fixed = idxs
 
                     history.foreach {
@@ -539,8 +535,7 @@ object NCSentenceManager extends NCService {
             // Validation (all indexes calculated well)
             require(
                 !res ||
-                    !ns.flatten.
-                        exists(n => ns.filter(_.wordIndexes.exists(n.wordIndexes.contains)).exists(t => !t.contains(n))),
+                !ns.flatten.exists(n => ns.filter(_.wordIndexes.exists(n.wordIndexes.contains)).exists(t => !t.contains(n))),
                 s"Invalid sentence:\n" +
                     ns.map(t =>
                         // Human readable invalid sentence for debugging.
@@ -745,9 +740,11 @@ object NCSentenceManager extends NCService {
             )
         )
 
+        // There are optimizations below. Similar variants by some criteria deleted.
+
         def notNlpNotes(s: NCNlpSentence): Seq[NCNlpSentenceNote] = s.flatten.filter(!_.isNlp)
 
-        // Drops similar sentences (with same notes structure). Keeps with more found.
+        // Drops similar sentences with same notes structure based on greedy elements. Keeps with more notes found.
         val notGreedyElems =
             mdl.getElements.asScala.flatMap(e => if (!e.isGreedy.orElse(mdl.isGreedy)) Some(e.getId) else None).toSet
 
@@ -768,6 +765,7 @@ object NCSentenceManager extends NCService {
 
         var sensWithNotesIdxs = sensWithNotes.zipWithIndex
 
+        // Drops similar sentences if there are other sentences with superset of notes.
         sens =
             sensWithNotesIdxs.filter { case ((_, notNlpNotes1), idx1) =>
                 !sensWithNotesIdxs.
@@ -775,13 +773,12 @@ object NCSentenceManager extends NCService {
                     exists { case((_, notNlpNotes2), _) => notNlpNotes1.subsetOf(notNlpNotes2) }
             }.map { case ((sen, _), _) => sen }
 
-        // Drops similar sentences (with same tokens structure).
-        // Among similar sentences we prefer one with minimal free words count.
+        // Drops similar sentences. Among similar sentences we prefer one with minimal free words count.
         sens = sens.groupBy(notNlpNotes(_).map(_.getKey(withIndexes = false))).
             map { case (_, seq) => seq.minBy(_.filter(p => p.isNlp && !p.isStopWord).map(_.wordIndexes.length).sum) }.
             toSeq
 
-        // Drops sentences if they are just subset of another.
+        // Drops sentences if they are just subset of another (indexes ignored here)
         sensWithNotes = sensWithNotes.filter { case (sen, _) => sens.contains(sen) }
 
         sensWithNotesIdxs = sensWithNotes.zipWithIndex
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/synonyms/NCSynonymsManager.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/synonyms/NCSynonymsManager.scala
index 80f9c19..fa31f26 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/synonyms/NCSynonymsManager.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/synonyms/NCSynonymsManager.scala
@@ -38,7 +38,7 @@ object NCSynonymsManager extends NCService {
         private lazy val cache =
             mutable.HashMap.empty[String, mutable.HashMap[Int, mutable.HashMap[Seq[T], mutable.HashSet[Synonym]]]]
 
-        def isUnprocessed(elemId: String, s: Synonym, tokens: Seq[T]): Boolean =
+        def isUnprocessed(elemId: String, syn: Synonym, tokens: Seq[T]): Boolean =
             cache.
                 getOrElseUpdate(
                     elemId,
@@ -51,7 +51,7 @@ object NCSynonymsManager extends NCService {
                 getOrElseUpdate(
                     tokens,
                     mutable.HashSet.empty[Synonym]
-                ).add(s)
+                ).add(syn)
     }
 
     private case class SavedIdlKey(id: String, startCharIndex: Int, endCharIndex: Int, other: Map[String, AnyRef] = Map.empty)
@@ -72,13 +72,11 @@ object NCSynonymsManager extends NCService {
                 )
     }
 
-    private case class Value(request: NCRequest, variants: Seq[Seq[NCToken]], predicate: NCIdlFunction) {
-        override def toString: String = variants.toString()
-    }
+    private case class SavedIdlValue(request: NCRequest, variants: Seq[Seq[NCToken]], predicate: NCIdlFunction)
 
     private case class IdlChunkKey(token: IdlToken, chunk: NCProbeSynonymChunk)
 
-    private val savedIdl = mutable.HashMap.empty[String, mutable.HashMap[SavedIdlKey, mutable.ArrayBuffer[Value]]]
+    private val savedIdl = mutable.HashMap.empty[String, mutable.HashMap[SavedIdlKey, mutable.ArrayBuffer[SavedIdlValue]]]
     private val idlChunksCache = mutable.HashMap.empty[String, mutable.HashMap[IdlChunkKey, Boolean]]
     private val idlCaches = mutable.HashMap.empty[String, CacheHolder[IdlToken]]
     private val tokCaches = mutable.HashMap.empty[String, CacheHolder[Int]]
@@ -120,7 +118,7 @@ object NCSynonymsManager extends NCService {
 
     /**
       *
-      * @param s
+      * @param syn
       * @param toks
       * @param isMatch
       * @param getIndex
@@ -128,19 +126,23 @@ object NCSynonymsManager extends NCService {
       * @tparam T
       */
     private def sparseMatch0[T](
-        s: Synonym,
+        syn: Synonym,
         toks: Seq[T],
         isMatch: (T, NCProbeSynonymChunk) => Boolean,
         getIndex: T => Int,
         shouldBeNeighbors: Boolean
     ): Option[Seq[T]] =
-        if (toks.size >= s.size) {
+        if (toks.size >= syn.size) {
             lazy val res = mutable.ArrayBuffer.empty[T]
             lazy val all = mutable.HashSet.empty[T]
 
+            // There are 3 states:
+            // 0 - initial working state, first step.
+            // 1 - working state, not first step.
+            // -1 - stop state.
             var state = 0
 
-            for (chunk <- s if state != -1) {
+            for (chunk <- syn if state != -1) {
                 val seq =
                     if (state == 0) {
                         state = 1
@@ -153,12 +155,12 @@ object NCSynonymsManager extends NCService {
                 if (seq.nonEmpty) {
                     val head = seq.head
 
-                    if (!s.permute && res.nonEmpty && getIndex(head) <= getIndex(res.last))
+                    if (!syn.permute && res.nonEmpty && getIndex(head) <= getIndex(res.last))
                         state = -1
                     else {
                         all ++= seq
 
-                        if (all.size > s.size)
+                        if (all.size > syn.size)
                             state = -1
                         else
                             res += head
@@ -168,7 +170,12 @@ object NCSynonymsManager extends NCService {
                     state = -1
             }
 
-            if (state != -1 && all.size == res.size && (!shouldBeNeighbors || U.isIncreased(res.map(getIndex).toSeq.sorted)))
+            if (
+                state != -1 && // State ok.
+                all.size == res.size && // There aren't excess processed tokens.
+                // `neighbors` conditions, important for simple not sparse synonyms.
+                (!shouldBeNeighbors || U.isIncreased(res.map(getIndex).toSeq.sorted))
+            )
                 Some(res.toSeq)
             else
                 None
@@ -186,69 +193,75 @@ object NCSynonymsManager extends NCService {
     private def save(req: NCRequest, tok: NCToken, pred: NCIdlFunction, variantsToks: Seq[Seq[NCToken]]): Unit = {
         savedIdl.
             getOrElseUpdate(req.getServerRequestId, mutable.HashMap.empty).
-            getOrElseUpdate(SavedIdlKey(tok), mutable.ArrayBuffer.empty) +=
-                Value(req, variantsToks, pred)
+                getOrElseUpdate(SavedIdlKey(tok), mutable.ArrayBuffer.empty) +=
+                    SavedIdlValue(req, variantsToks, pred)
     }
 
     /**
+      * Checks that given synonym is not checked yet with given NLP tokens' indexes.
       *
       * @param srvReqId
       * @param elemId
-      * @param s
+      * @param syn
       * @param tokens
       */
-    private def isUnprocessedTokens(srvReqId: String, elemId: String, s: Synonym, tokens: Seq[Int]): Boolean =
-        tokCaches.getOrElseUpdate(srvReqId, new CacheHolder[Int]).isUnprocessed(elemId, s, tokens)
+    private def isUnprocessedTokens(srvReqId: String, elemId: String, syn: Synonym, tokens: Seq[Int]): Boolean =
+        tokCaches.getOrElseUpdate(srvReqId, new CacheHolder[Int]).isUnprocessed(elemId, syn, tokens)
 
     /**
+      * Checks that given synonym is not checked yet with given IDL tokens.
       *
       * @param srvReqId
       * @param elemId
-      * @param s
+      * @param syn
       * @param tokens
       */
-    private def isUnprocessedIdl(srvReqId: String, elemId: String, s: Synonym, tokens: Seq[IdlToken]): Boolean =
-        idlCaches.getOrElseUpdate(srvReqId, new CacheHolder[IdlToken]).isUnprocessed(elemId, s, tokens)
+    private def isUnprocessedIdl(srvReqId: String, elemId: String, syn: Synonym, tokens: Seq[IdlToken]): Boolean =
+        idlCaches.getOrElseUpdate(srvReqId, new CacheHolder[IdlToken]).isUnprocessed(elemId, syn, tokens)
 
     /**
+      * Checks matching IDL token with synonym's chunk.
       *
-      * @param tow
-      * @param chunk
-      * @param req
-      * @param variantsToks
+      * @param t IDL token.
+      * @param chunk Synonym's chunk.
+      * @param req Request.
+      * @param variantsToks All possible request's variants.
       */
     private def isMatch(
-        tow: IdlToken, chunk: NCProbeSynonymChunk, req: NCRequest, variantsToks: Seq[Seq[NCToken]]
+        t: IdlToken, chunk: NCProbeSynonymChunk, req: NCRequest, variantsToks: Seq[Seq[NCToken]]
     ): Boolean =
         idlChunksCache.
-            getOrElseUpdate(req.getServerRequestId,
+            getOrElseUpdate(
+                req.getServerRequestId,
                 mutable.HashMap.empty[IdlChunkKey, Boolean]
             ).
             getOrElseUpdate(
-                IdlChunkKey(tow, chunk),
+                IdlChunkKey(t, chunk),
                 {
-                    def get0[T](fromToken: NCToken => T, fromWord: NlpToken => T): T =
-                        if (tow.isToken) fromToken(tow.token) else fromWord(tow.word)
-
                     chunk.kind match {
-                        case TEXT => chunk.wordStem == get0(_.stem, _.stem)
+                        case TEXT => chunk.wordStem == t.stem
 
                         case REGEX =>
-                            chunk.regex.matcher(get0(_.origText, _.origText)).matches() ||
-                            chunk.regex.matcher(get0(_.normText, _.normText)).matches()
+                            chunk.regex.matcher(t.origText).matches() || chunk.regex.matcher(t.normText).matches()
 
                         case IDL =>
-                            val ok =
+                            val ok = {
+                                // IDL condition just for tokens.
+                                t.isToken &&
+                                // Should be found at least one suitable variant (valid NCIdlContext) for given token.
+                                // This variant will be checked again on last processing phase.
                                 variantsToks.par.exists(vrntToks =>
-                                    get0(t =>
-                                        chunk.idlPred.apply(t, NCIdlContext(toks = vrntToks, req = req)).
-                                            value.asInstanceOf[Boolean],
-                                        _ => false
+                                    chunk.idlPred.apply(
+                                        t.token,
+                                        NCIdlContext(toks = vrntToks, req = req)).value.asInstanceOf[Boolean]
                                     )
-                                )
+                            }
 
+                            // Saves all variants for next validation.
+                            // All suitable variants can be deleted, so this positive result can be abolished
+                            // on last processing phase.
                             if (ok)
-                                save(req, tow.token, chunk.idlPred, variantsToks)
+                                save(req, t.token, chunk.idlPred, variantsToks)
 
                             ok
 
@@ -270,22 +283,29 @@ object NCSynonymsManager extends NCService {
             require(toks != null)
             require(!syn.sparse && !syn.hasIdl)
 
-            if (
-                toks.length == syn.length && {
+            if (toks.length == syn.length) { // Same length.
+                val ok =
                     if (syn.isTextOnly)
-                        toks.zip(syn).forall(p => p._1.stem == p._2.wordStem)
+                        toks.zip(syn).
+                            // Checks all synonym chunks with all tokens.
+                            forall { case (tok, chunk) => tok.stem == chunk.wordStem }
                     else
-                        toks.zip(syn).sortBy(p => getSort(p._2.kind)).forall { case (tok, chunk) => isMatch(tok, chunk) }
-                }
-            )
-                callback(())
+                        toks.zip(syn).
+                            // Pre-sort by chunk kind for performance reasons, easier to compare should be first.
+                            sortBy { case (_, chunk) => getSort(chunk.kind) }.
+                            // Checks all synonym chunks with all tokens.
+                            forall { case (tok, chunk) => isMatch(tok, chunk) }
+
+                if (ok)
+                    callback(())
+            }
         }
 
     /**
       *
       * @param srvReqId
       * @param elemId
-      * @param s
+      * @param syn
       * @param toks
       * @param req
       * @param variantsToks
@@ -294,22 +314,22 @@ object NCSynonymsManager extends NCService {
     def onMatch(
         srvReqId: String,
         elemId: String,
-        s: Synonym,
+        syn: Synonym,
         toks: Seq[IdlToken],
         req: NCRequest,
         variantsToks: Seq[Seq[NCToken]],
         callback: Unit => Unit
     ): Unit =
-        if (isUnprocessedIdl(srvReqId, elemId, s, toks)) {
+        if (isUnprocessedIdl(srvReqId, elemId, syn, toks)) {
             require(toks != null)
 
             if (
-                toks.length == s.length && // Same length.
-                toks.count(_.isToken) >= s.idlChunks && // Enough tokens.
-                toks.zip(s).sortBy { // Pre-sort by chunk kind.
+                toks.length == syn.length && // Same length.
+                toks.count(_.isToken) >= syn.idlChunks && // Enough tokens.
+                toks.zip(syn).sortBy { // Pre-sort by chunk kind for performance reasons, easier to compare should be first.
                     case (_, chunk) => getSort(chunk.kind)
-                }
-                .forall { // TODO?
+                }.
+                forall { // Checks all synonym chunks with all tokens.
                     case (idlTok, chunk) => isMatch(idlTok, chunk, req, variantsToks)
                 }
             )
@@ -365,7 +385,7 @@ object NCSynonymsManager extends NCService {
                 syn,
                 toks,
                 (t: IdlToken, chunk: NCProbeSynonymChunk) => isMatch(t, chunk, req, variantsToks),
-                (t: IdlToken) => if (t.isToken) t.token.getStartCharIndex else t.word.startCharIndex,
+                (t: IdlToken) => t.startCharIndex,
                 shouldBeNeighbors = !syn.sparse
             ) match {
                 case Some(res) => callback(res)
@@ -374,13 +394,15 @@ object NCSynonymsManager extends NCService {
         }
 
     /**
+      * Checks that suitable variant wasn't deleted and IDL condition for token is still valid.
+      * We have to check it because NCIdlContext which used in predicate based on variant.
       *
       * @param srvReqId
-      * @param senToks
+      * @param toks
       */
-    def isStillValidIdl(srvReqId: String, senToks: Seq[NCToken]): Boolean =
+    def isStillValidIdl(srvReqId: String, toks: Seq[NCToken]): Boolean =
         savedIdl.get(srvReqId) match {
-            case Some(m) =>
+            case Some(map) =>
                 lazy val allCheckedSenToks = {
                     val set = mutable.HashSet.empty[SavedIdlKey]
 
@@ -390,13 +412,13 @@ object NCSynonymsManager extends NCService {
                         t.getPartTokens.asScala.foreach(add)
                     }
 
-                    senToks.foreach(add)
+                    toks.foreach(add)
 
                     set
                 }
 
-                senToks.forall(tok =>
-                    m.get(SavedIdlKey(tok)) match {
+                toks.forall(tok =>
+                    map.get(SavedIdlKey(tok)) match {
                         case Some(vals) =>
                             vals.exists(
                                 v =>
@@ -417,6 +439,7 @@ object NCSynonymsManager extends NCService {
         }
 
     /**
+      * Called when request processing finished.
       *
       * @param srvReqId
       */
@@ -427,6 +450,7 @@ object NCSynonymsManager extends NCService {
     }
 
     /**
+      * Called on each request enrichment iteration.
       *
       * @param srvReqId
       */