You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nlpcraft.apache.org by ar...@apache.org on 2021/12/23 22:14:59 UTC

[incubator-nlpcraft] branch NLPCRAFT-469 updated: CR.

This is an automated email from the ASF dual-hosted git repository.

aradzinski pushed a commit to branch NLPCRAFT-469
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git


The following commit(s) were added to refs/heads/NLPCRAFT-469 by this push:
     new e91a46d  CR.
e91a46d is described below

commit e91a46d929f7f86681704216697d7cee87e979c1
Author: Aaron Radzinski <ar...@datalingvo.com>
AuthorDate: Thu Dec 23 14:14:54 2021 -0800

    CR.
---
 bindist/LICENSE                                    |  37 ----
 bindist/NOTICE                                     |   7 -
 .../scala/org/apache/nlpcraft/NCResultType.java    |   4 +-
 .../enricher/NCEnDictionaryTokenEnricher.java      |   3 +-
 .../token/enricher/impl/NCEnDictionaryImpl.scala   |   6 +-
 .../token/parser/opennlp/NCOpenNlpTokenParser.java |  12 +-
 .../opennlp/impl/NCEnStopWordGenerator.scala       |  17 +-
 .../parser/opennlp/impl/NCEnStopWordsFinder.scala  | 200 ++++++++-------------
 .../token/parser/opennlp/impl/NCOpenNlpImpl.scala  |  45 +++--
 .../apache/nlpcraft/internal/util/NCUtils.scala    |  29 +--
 10 files changed, 125 insertions(+), 235 deletions(-)

diff --git a/bindist/LICENSE b/bindist/LICENSE
index 16bebe4..09418e7 100644
--- a/bindist/LICENSE
+++ b/bindist/LICENSE
@@ -203,40 +203,3 @@
 
 Files under 'nlpcraft/src/main/resources/moby' folder are public domain. See
 https://en.wikipedia.org/wiki/Moby_Project for more information.
-
-File org.apache.nlpcraft.internal.util.NCIdGenerator.java is based on https://github.com/peet/hashids.java
-and licensed under MIT license:
-
-    Copyright (C) 2012 Ivan Akimov
-
-    Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
-    documentation files (the "Software"), to deal in the Software without restriction, including without limitation
-    the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
-    and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
-
-    The above copyright notice and this permission notice shall be included in all copies or substantial
-    portions of the Software.
-
-    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
-    TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-    THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
-    CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-    DEALINGS IN THE SOFTWARE.
-
-File org.apache.nlpcraft.internal.blowfish.NCBlowfishHasher.java is based on
-https://github.com/jeremyh/jBCrypt/blob/master/src/main/java/org/mindrot/BCrypt.java
-and licensed as follows:
-
-    Copyright (C) 2006 Damien Miller <dj...@mindrot.org>
-
-    Permission to use, copy, modify, and distribute this software for any
-    purpose with or without fee is hereby granted, provided that the above
-    copyright notice and this permission notice appear in all copies.
-
-    THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
-    WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
-    MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
-    ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
-    WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
-    ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
-    OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
\ No newline at end of file
diff --git a/bindist/NOTICE b/bindist/NOTICE
index add790b..6d75bcb 100644
--- a/bindist/NOTICE
+++ b/bindist/NOTICE
@@ -4,10 +4,3 @@ Copyright (C) 2021 The Apache Software Foundation
 This product includes software developed at
 The Apache Software Foundation (http://www.apache.org/).
 
-File org.apache.nlpcraft.internal.util.NCIdGenerator.java.
-Based on https://github.com/peet/hashids.java
-Copyright (C) 2012 Ivan Akimov
-
-File org.apache.nlpcraft.internal.blowfish.NCBlowfishHasher.java.
-Based on https://github.com/jeremyh/jBCrypt/blob/master/src/main/java/org/mindrot/BCrypt.java
-Copyright (C) 2006 Damien Miller <dj...@mindrot.org>
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCResultType.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCResultType.java
index e2579ed..12edb50 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCResultType.java
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCResultType.java
@@ -22,12 +22,12 @@ package org.apache.nlpcraft;
  */
 public enum NCResultType {
     /**
-     * Final result is produced.
+     * Final result is ready.
      */
     ASK_RESULT,
 
     /**
-     * Curation is requires.
+     * Curation is required.
      */
     ASK_CURATE,
 
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/nlp/token/enricher/NCEnDictionaryTokenEnricher.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/nlp/token/enricher/NCEnDictionaryTokenEnricher.java
index a2a1ff1..85f0163 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/nlp/token/enricher/NCEnDictionaryTokenEnricher.java
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/nlp/token/enricher/NCEnDictionaryTokenEnricher.java
@@ -22,7 +22,7 @@ import org.apache.nlpcraft.internal.nlp.token.enricher.impl.NCEnDictionaryImpl;
 import java.util.List;
 
 /**
- * 
+ * TODO: enriches with <code>dict:en</code> property.
  */
 public class NCEnDictionaryTokenEnricher implements NCTokenEnricher {
     private final NCEnDictionaryImpl impl = new NCEnDictionaryImpl();
@@ -40,7 +40,6 @@ public class NCEnDictionaryTokenEnricher implements NCTokenEnricher {
     @Override
     public void enrich(NCRequest req, NCModelConfig cfg, List<NCToken> toks) {
         assert impl != null;
-
         impl.enrich(req, cfg, toks);
     }
 }
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/nlp/token/enricher/impl/NCEnDictionaryImpl.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/nlp/token/enricher/impl/NCEnDictionaryImpl.scala
index 405cff3..4c0b53d 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/nlp/token/enricher/impl/NCEnDictionaryImpl.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/nlp/token/enricher/impl/NCEnDictionaryImpl.scala
@@ -19,8 +19,9 @@ package org.apache.nlpcraft.internal.nlp.token.enricher.impl
 
 import org.apache.nlpcraft.*
 import org.apache.nlpcraft.internal.util.NCUtils
+
 /**
-  *
+  * TODO: enriches with <code>dict:en</code> property.
   */
 class NCEnDictionaryImpl extends NCLifecycle:
     @volatile private var dict: Set[String] = _
@@ -29,10 +30,11 @@ class NCEnDictionaryImpl extends NCLifecycle:
     override def stop(): Unit = dict = null
 
     /**
+      * TODO: enriches with <code>dict:en</code> property.
       *
       * @param req
       * @param cfg
       * @param toks
       */
     def enrich(req: NCRequest, cfg: NCModelConfig, toks: java.util.List[NCToken]): Unit =
-        toks.forEach(t => t.put("english", dict.contains(t.getLemma)))
+        toks.forEach(t => t.put("dict:en", dict.contains(t.getLemma)))
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/nlp/token/parser/opennlp/NCOpenNlpTokenParser.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/nlp/token/parser/opennlp/NCOpenNlpTokenParser.java
index a20131b..c0e921f 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/nlp/token/parser/opennlp/NCOpenNlpTokenParser.java
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/nlp/token/parser/opennlp/NCOpenNlpTokenParser.java
@@ -58,9 +58,9 @@ public class NCOpenNlpTokenParser implements NCTokenParser {
      * @throws NCException
      */
     public NCOpenNlpTokenParser(File tokMdl, File posMdl, File lemmaDic) {
-        Objects.requireNonNull(tokMdl, "Tonenizer model cannot be null");
-        Objects.requireNonNull(posMdl, "POS model cannot be null");
-        Objects.requireNonNull(lemmaDic, "Lemmatizer model cannot be null");
+        Objects.requireNonNull(tokMdl, "Tonenizer model file cannot be null.");
+        Objects.requireNonNull(posMdl, "POS model file cannot be null.");
+        Objects.requireNonNull(lemmaDic, "Lemmatizer model file cannot be null.");
 
         try {
             impl = NCOpenNlpImpl.apply(tokMdl, posMdl, lemmaDic);
@@ -78,9 +78,9 @@ public class NCOpenNlpTokenParser implements NCTokenParser {
      * @throws NCException
      */
     public NCOpenNlpTokenParser(String tokMdlSrc, String posMdlSrc, String lemmaDicSrc) {
-        Objects.requireNonNull(tokMdlSrc, "Tonenizer model cannot be null");
-        Objects.requireNonNull(posMdlSrc, "POS model cannot be null");
-        Objects.requireNonNull(lemmaDicSrc, "Lemmatizer model cannot be null");
+        Objects.requireNonNull(tokMdlSrc, "Tonenizer model path cannot be null.");
+        Objects.requireNonNull(posMdlSrc, "POS model path cannot be null.");
+        Objects.requireNonNull(lemmaDicSrc, "Lemmatizer model path cannot be null.");
 
         try {
             impl = NCOpenNlpImpl.apply(tokMdlSrc, posMdlSrc, lemmaDicSrc);
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/nlp/token/parser/opennlp/impl/NCEnStopWordGenerator.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/nlp/token/parser/opennlp/impl/NCEnStopWordGenerator.scala
index c470d51..0751715 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/nlp/token/parser/opennlp/impl/NCEnStopWordGenerator.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/nlp/token/parser/opennlp/impl/NCEnStopWordGenerator.scala
@@ -25,7 +25,7 @@ import scala.collection.mutable
 /**
   * Generates first word sequences.
   */
-object NCEnStopWordGenerator extends App:
+object NCEnStopWordGenerator:
     private final lazy val stemmer = new PorterStemmer
 
     // Output files.
@@ -193,7 +193,6 @@ object NCEnStopWordGenerator extends App:
 
     private def mkGzip(path: String, lines: Iterable[Any]): Unit =
         val p = NCUtils.mkPath(s"nlpcraft/src/main/resources/stopwords/$path")
-
         NCUtils.mkTextFile(p, lines)
         NCUtils.gzipPath(p)
 
@@ -344,9 +343,13 @@ object NCEnStopWordGenerator extends App:
 
         mkGzip(FIRST_WORDS_FILE, stem(buf.toSeq))
 
-    mkFirstWords()
-    mkNounWords()
-
-    mkGzip(POS_WORDS_FILE, stem(mkPossessiveStopWords))
+    /**
+      *
+      * @param args
+      */
+    def main(args: Array[String]): Unit =
+        mkFirstWords()
+        mkNounWords()
+        mkGzip(POS_WORDS_FILE, stem(mkPossessiveStopWords))
 
-    sys.exit()
+        sys.exit()
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/nlp/token/parser/opennlp/impl/NCEnStopWordsFinder.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/nlp/token/parser/opennlp/impl/NCEnStopWordsFinder.scala
index a90cce7..167dd89 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/nlp/token/parser/opennlp/impl/NCEnStopWordsFinder.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/nlp/token/parser/opennlp/impl/NCEnStopWordsFinder.scala
@@ -19,9 +19,10 @@ package org.apache.nlpcraft.internal.nlp.token.parser.opennlp.impl
 
 import com.typesafe.scalalogging.LazyLogging
 import opennlp.tools.stemmer.PorterStemmer
+
 import org.apache.nlpcraft.internal.nlp.token.parser.opennlp.impl.NCEnStopWordsFinder.*
 import org.apache.nlpcraft.internal.util.NCUtils
-import org.apache.nlpcraft.{NCException, NCParameterizedAdapter, NCToken}
+import org.apache.nlpcraft.*
 
 import java.util
 import java.util.{List as JList, Set as JSet}
@@ -85,7 +86,6 @@ private[impl] object NCEnStopWordsFinder:
     )
 
     private val STOP_BEFORE_STOP: Seq[Word] = Seq("DT", "PRP", "PRP$", "WDT", "WP", "WP$", "WRB")
-
     private val Q_POS = Set("``", "''")
 
     /**
@@ -132,7 +132,7 @@ private[impl] object NCEnStopWordsFinder:
             set.exists { case (b, e) => (b.isEmpty || s.startsWith(b)) && (e.isEmpty || s.endsWith(e)) }
 
         def matches(s: String, posOpt: Option[String]): Boolean =
-            if (s.contains(' '))
+            if s.contains(' ') then
                 false
             else
                 posOpt match
@@ -163,32 +163,33 @@ private[impl] object NCEnStopWordsFinder:
         wildcardsOrigins: ScanHolder
     ):
         def matches(toks: Seq[NCToken]): Boolean =
-            val posOpt =
-                toks.size match
-                    case 0 => throw new AssertionError(s"Unexpected empty tokens.")
-                    case 1 => Some(toks.head.getPos)
-                    case _ => None
+            val posOpt = toks.size match
+                case 0 => throw new AssertionError(s"Unexpected empty tokens.")
+                case 1 => Some(toks.head.getPos)
+                case _ => None
 
             // Hash access.
             stems.matches(toStemKey(toks), posOpt) ||
-                lemmas.matches(toLemmaKey(toks), posOpt) ||
-                origins.matches(toOriginalKey(toks), posOpt) ||
-                // Scan access.
-                wildcardsLemmas.matches(toLemmaKey(toks), posOpt) ||
-                wildcardsOrigins.matches(toOriginalKey(toks), posOpt)
-
-    private  def isQuote(t: NCToken): Boolean = Q_POS.contains(t.getPos)
+            lemmas.matches(toLemmaKey(toks), posOpt) ||
+            origins.matches(toOriginalKey(toks), posOpt) ||
+            // Scan access.
+            wildcardsLemmas.matches(toLemmaKey(toks), posOpt) ||
+            wildcardsOrigins.matches(toOriginalKey(toks), posOpt)
 
+    private def isQuote(t: NCToken): Boolean = Q_POS.contains(t.getPos)
     private def toStemKey(toks: Seq[NCToken]): String = toks.map(_.getStem).mkString(" ")
     private def toLemmaKey(toks: Seq[NCToken]): String = toks.map(_.getLemma).mkString(" ")
     private def toValueKey(toks: Seq[NCToken]): String = toks.map(_.getOriginalText.toLowerCase).mkString(" ")
     private def toOriginalKey(toks: Seq[NCToken]): String = toks.map(_.getOriginalText).mkString(" ")
 
-import org.apache.nlpcraft.internal.nlp.token.parser.opennlp.impl.NCEnStopWordsFinder.*
-
-private[impl] class NCEnStopWordsFinder(addStopWordsStems: Set[String], exclStopWordsStems: Set[String]) extends LazyLogging:
-    require(addStopWordsStems != null)
-    require(exclStopWordsStems != null)
+/**
+  *
+  * @param addStems
+  * @param exclStems
+  */
+private[impl] class NCEnStopWordsFinder(addStems: Set[String], exclStems: Set[String]) extends LazyLogging:
+    require(addStems != null)
+    require(exclStems != null)
 
     private val stemmer = new PorterStemmer
 
@@ -203,13 +204,13 @@ private[impl] class NCEnStopWordsFinder(addStopWordsStems: Set[String], exclStop
         "percent"
     ).map(stemmer.stem)
 
-    @volatile private var possessiveWords: Set[String] = _
+    @volatile private var posWords: Set[String] = _ // Possessive words.
     @volatile private var firstWords: Set[String] = _
     @volatile private var nounWords: Set[String] = _
 
     // Stemmatization is done already by generator.
     NCUtils.executeParallel(
-        () => possessiveWords = read("stopwords/possessive_words.txt.gz"),
+        () => posWords = read("stopwords/possessive_words.txt.gz"),
         () => firstWords = read("stopwords/first_words.txt.gz"),
         () => nounWords = read("stopwords/noun_words.txt.gz")
     )(ExecutionContext.Implicits.global)
@@ -233,10 +234,8 @@ private[impl] class NCEnStopWordsFinder(addStopWordsStems: Set[String], exclStop
       */
     private def readStopWords(lines: Seq[String]): Map[Boolean, StopWordHolder] =
         // 1. Prepares accumulation data structure.
-        object WordForm extends Enumeration:
-            type WordForm = Value
-
-            val STEM, LEM, ORIG = Value
+        enum WordForm:
+            case STEM, LEM, ORIG
 
         import WordForm.*
 
@@ -246,7 +245,7 @@ private[impl] class NCEnStopWordsFinder(addStopWordsStems: Set[String], exclStop
             val excludes = mutable.HashMap.empty[String, mutable.HashSet[T]]
 
             def addCondition(cond: T, poses: Map[String, Boolean]): Any =
-                if (poses.isEmpty)
+                if poses.isEmpty then
                     any += cond
                 else
                     def add(m: mutable.HashMap[String, mutable.HashSet[T]], incl: Boolean): Unit =
@@ -255,10 +254,8 @@ private[impl] class NCEnStopWordsFinder(addStopWordsStems: Set[String], exclStop
                                 case Some(set) => set.add(cond)
                                 case _ =>
                                     val set = mutable.HashSet.empty[T]
-
-                                        set += cond
-
-                                        m += pos -> set
+                                    set += cond
+                                    m += pos -> set
                         )
 
                     add(includes, incl = true)
@@ -267,17 +264,13 @@ private[impl] class NCEnStopWordsFinder(addStopWordsStems: Set[String], exclStop
         type Key = (Boolean, WordForm)
         def mkMap[T](mkT: Unit => T): Map[Key, T] =
             val m = mutable.Map.empty[Key, T]
-
             def add(f: WordForm, mkT: Unit => T, isExc: Boolean): Unit =
                 val tuple: (Key, T) = (isExc, f) -> mkT(())
-
                 m += tuple._1 -> tuple._2
-
             WordForm.values.foreach(f =>
                 add(f, mkT, isExc = true)
-                    add(f, mkT, isExc = false)
+                add(f, mkT, isExc = false)
             )
-
             m.toMap
 
         // Prepares collections.
@@ -292,10 +285,9 @@ private[impl] class NCEnStopWordsFinder(addStopWordsStems: Set[String], exclStop
             var s = line.trim
 
             // Word with size 1 word should contains letter only.
-            if (s.length == 1 && !s.head.isLetter)
-                throwError("Invalid stop word")
+            if s.length == 1 && !s.head.isLetter then throwError("Invalid stop word")
 
-            def checkSingle(ch: Char): Unit = if (s.count(_ == ch) > 1) throwError(s"Unexpected symbols count: $ch")
+            def checkSingle(ch: Char): Unit = if s.count(_ == ch) > 1 then throwError(s"Unexpected symbols count: $ch")
 
             // Confusing special symbols.
             checkSingle('@')
@@ -303,71 +295,47 @@ private[impl] class NCEnStopWordsFinder(addStopWordsStems: Set[String], exclStop
             checkSingle('*')
 
             val isExc = line.head == '~'
-
-            if (isExc)
-                s = line.drop(1)
-
+            if isExc then s = line.drop(1)
             val idxPos = s.indexOf("|")
-
             val poses: Map[String, Boolean] =
-                if (idxPos > 0)
+                if idxPos > 0 then
                     s.
                         drop(idxPos + 1).
                         trim.split(" ").
                         map(_.trim.toUpperCase).
                         filter(_.nonEmpty).
                         toSeq.
-                        map(p => if (p.head == '~') p.drop(1).strip -> false else p -> true).
+                        map(p => if p.head == '~' then p.drop(1).strip -> false else p -> true).
                         toMap
                 else
                     Map.empty
 
-            if (!poses.keys.forall(POSES.contains))
-                throwError(s"Invalid POSes: ${poses.keys.mkString(", ")}")
-
+            if  !poses.keys.forall(POSES.contains) then throwError(s"Invalid POSes: ${poses.keys.mkString(", ")}")
             val hasPoses = poses.nonEmpty
-
-            if (hasPoses)
-                s = s.take(idxPos).trim
-
+            if hasPoses then s = s.take(idxPos).trim
             val isMultiWord = s.contains(' ')
 
             // Confusing POSes.
-            if (poses.nonEmpty && isMultiWord)
-                throwError("POSes cannot be defined for multiple stop words.")
-
+            if poses.nonEmpty && isMultiWord then throwError("POSes cannot be defined for multiple stop words.")
             var isCase = false
-
-            if (s.head == '@')
+            if s.head == '@' then
                 s = s.drop(1)
-
                 // Empty word.
-                if (s.isEmpty)
-                    throwError("Empty word")
-
+                if s.isEmpty then throwError("Empty word.")
                 isCase = true
-
             val idxWild = s.indexOf("*")
-
-            if (idxWild >= 0 && isMultiWord)
-                throwError("Wildcard cannot be defined for multiple stop words.")
-
-            if (idxWild < 0)
+            if idxWild >= 0 && isMultiWord then throwError("Wildcard cannot be defined for multiple stop words.")
+            if idxWild < 0 then
                 val (word, form) =
-                    if (isCase)
-                        (s, ORIG)
-                    else {
-                        if (!hasPoses) (stemmer.stem(s), STEM) else (stemmer.stem(s), LEM)
-                    }
-
+                    if isCase then (s, ORIG)
+                    else
+                        if !hasPoses then (stemmer.stem(s), STEM) else (stemmer.stem(s), LEM)
                 mHash((isExc, form)).addCondition(word, poses)
             else
                 val b = s.take(idxWild)
                 val e = s.drop(idxWild + 1)
 
-                if (b.isEmpty && e.isEmpty && !hasPoses)
-                    throwError("Too general wildcard definition.")
-
+                if b.isEmpty && e.isEmpty && !hasPoses then throwError("Too general wildcard definition.")
                 mScan((isExc, if (isCase) ORIG else LEM)).addCondition((b, e), poses)
 
         // 3. Converts data to service format.
@@ -383,12 +351,12 @@ private[impl] class NCEnStopWordsFinder(addStopWordsStems: Set[String], exclStop
                 val incl = toImmutable(m((isExc, form)).includes)
                 val excl = toImmutable(m((isExc, form)).excludes)
 
-                    mkInstance(any ++ excl.values.flatten, incl, excl)
-
+                mkInstance(any ++ excl.values.flatten, incl, excl)
+            end mkHolder
             def mkHash(form: WordForm): HashHolder = mkHolder(mHash, form, HashHolder.apply)
             def mkScan(form: WordForm): ScanHolder = mkHolder(mScan, form, ScanHolder.apply)
 
-                isExc -> StopWordHolder(mkHash(STEM), mkHash(LEM), mkHash(ORIG), mkScan(LEM), mkScan(ORIG))
+            isExc -> StopWordHolder(mkHash(STEM), mkHash(LEM), mkHash(ORIG), mkScan(LEM), mkScan(ORIG))
         ).toMap
 
     private def isVerb(pos: String): Boolean = pos.head == 'V'
@@ -412,18 +380,12 @@ private[impl] class NCEnStopWordsFinder(addStopWordsStems: Set[String], exclStop
     ): Boolean =
         var stop = true
 
-        for (
-            (tok, idx) <- ns.zipWithIndex
-                if idx != lastIdx &&
-                    !tok.isStopWord &&
-                    !isException(Seq(tok)) &&
-                    stopPoses.contains(tok.getPos) &&
-                    ns(idx + 1).isStopWord)
+        for ((tok, idx) <- ns.zipWithIndex if idx != lastIdx && !tok.isStopWord && !isException(Seq(tok)) &&
+            stopPoses.contains(tok.getPos) && ns(idx + 1).isStopWord)
             stops += tok
-
             stop = false
 
-        if (stop) true else markBefore(ns, stopPoses, lastIdx, isException, stops)
+        if stop then true else markBefore(ns, stopPoses, lastIdx, isException, stops)
 
     /**
       * Checks value cached or not.
@@ -437,9 +399,7 @@ private[impl] class NCEnStopWordsFinder(addStopWordsStems: Set[String], exclStop
             case Some(b) => b
             case None =>
                 val b = get(toks)
-
                 cache += toks -> b
-
                 b
 
     /**
@@ -454,20 +414,12 @@ private[impl] class NCEnStopWordsFinder(addStopWordsStems: Set[String], exclStop
             val max = ns.size - 1
             var stop = true
 
-            for (
-                (tok, idx) <- ns.zipWithIndex
-                    if idx != max &&
-                        !tok.isStopWord &&
-                        !exclStopWordsStems.contains(tok.getStem) &&
-                        POSES.contains(tok.getPos) &&
-                        ns(idx + 1).isStopWord
-            )
+            for ((tok, idx) <- ns.zipWithIndex if idx != max && !tok.isStopWord && !exclStems.contains(tok.getStem) &&
+                POSES.contains(tok.getPos) && ns(idx + 1).isStopWord)
                 stops += tok
-
                 stop = false
 
-            if (!stop)
-                processCommonStops0(ns)
+            if !stop then processCommonStops0(ns)
 
         processCommonStops0(ns)
 
@@ -506,8 +458,7 @@ private[impl] class NCEnStopWordsFinder(addStopWordsStems: Set[String], exclStop
             // | Pass #1.                        |
             // | POS tags and manual resolution. |
             // +---------------------------------+
-            val stop =
-            !isException(Seq(tok)) &&
+            val stop = !isException(Seq(tok)) &&
                 (// Percents after numbers.
                     // 1. Word from 'percentage' list.
                     percents.contains(stem) &&
@@ -515,13 +466,12 @@ private[impl] class NCEnStopWordsFinder(addStopWordsStems: Set[String], exclStop
                         !isFirst && prev().getPos == "CD" &&
                         // 3. It's last word or any words after except numbers.
                         (isLast || next().getPos != "CD")
-                    ) ||
+                ) ||
                 // be, was, is etc. or has been etc.
                 isCommonVerbs("have", "be") ||
                 // be, was, is etc. or have done etc.
                 isCommonVerbs("have", "do")
-            if (stop)
-                stops += tok
+            if stop then stops += tok
 
         // +--------------------------------------+
         // | Pass #2.                             |
@@ -542,7 +492,7 @@ private[impl] class NCEnStopWordsFinder(addStopWordsStems: Set[String], exclStop
         // | Pass #3.                                   |
         // | Check external possessive stop-word file.  |
         // +--------------------------------------------+
-        for (tup <- origToks; key = tup._2 if possessiveWords.contains(key) && !isException(tup._1))
+        for (tup <- origToks; key = tup._2 if posWords.contains(key) && !isException(tup._1))
             tup._1.foreach(tok => stops += tok)
 
         // +--------------------------------------------------+
@@ -554,8 +504,7 @@ private[impl] class NCEnStopWordsFinder(addStopWordsStems: Set[String], exclStop
 
         // All sentence first stop words + first non stop word.
         val startToks = toks.takeWhile(_.isStopWord) ++ toks.find(!_.isStopWord).map(p => p)
-        for (startTok <- startToks; tup <- origToks.filter(_._1.head == startTok); key = tup._2
-            if firstWords.contains(key) && !isException(tup._1))
+        for (startTok <- startToks; tup <- origToks.filter(_._1.head == startTok); key = tup._2 if firstWords.contains(key) && !isException(tup._1))
             tup._1.foreach(tok => stops += tok)
             foundKeys += key
 
@@ -565,9 +514,7 @@ private[impl] class NCEnStopWordsFinder(addStopWordsStems: Set[String], exclStop
         // +-------------------------------------------------+
         for (tup <- origToks; key = tup._2 if !foundKeys.contains(key) && !isException(tup._1))
             foundKeys.find(key.startsWith) match
-                case Some(s) =>
-                    if (nounWords.contains(key.substring(s.length).strip))
-                        tup._1.foreach(tok => stops += tok)
+                case Some(s) => if nounWords.contains(key.substring(s.length).strip) then tup._1.foreach(tok => stops += tok)
                 case None => ()
 
         // +-------------------------------------------------+
@@ -580,10 +527,10 @@ private[impl] class NCEnStopWordsFinder(addStopWordsStems: Set[String], exclStop
         // | Pass #7.                                        |
         // | Processing additional and excluded stop words.  |
         // +-------------------------------------------------+
-        for (t <- toks if addStopWordsStems.contains(t.getStem))
+        for (t <- toks if addStems.contains(t.getStem))
             stops += t
 
-        for (t <- stops.filter(t => exclStopWordsStems.contains(t.getStem)))
+        for (t <- stops.filter(t => exclStems.contains(t.getStem)))
             stops -= t
 
         // +-------------------------------------------------+
@@ -600,23 +547,18 @@ private[impl] class NCEnStopWordsFinder(addStopWordsStems: Set[String], exclStop
         // +-------------------------------------------------+
         var quotes = toks.filter(isQuote)
 
-        if (quotes.size % 2 != 0)
-            // Just ignore last odd quote.
-            quotes = quotes.reverse.drop(1).reverse
+        // Just ignore last odd quote.
+        if quotes.size % 2 != 0 then quotes = quotes.reverse.drop(1).reverse
 
-        if (quotes.nonEmpty)
+        if quotes.nonEmpty then
             val m = toks.zipWithIndex.toMap
-
-            val pairs =
-                quotes.zipWithIndex.
-                    drop(1).
-                    flatMap { case (t, idx) => if (idx % 2 != 0) Some(m(t) -> m(quotes(idx - 1))) else None }
-
-            stops --=
-                stops.filter(t => pairs.exists { case (from, to) =>
+            val pairs = quotes.zipWithIndex.drop(1). flatMap {
+                case (t, idx) => if idx % 2 != 0 then Some(m(t) -> m(quotes(idx - 1))) else None
+            }
+            stops --= stops.filter(t => pairs.exists {
+                case (from, to) =>
                     val idx = m(t)
-
                     from > idx && to < idx
-                })
+            })
 
         stops.toSeq.sortBy(_.getStartCharIndex)
\ No newline at end of file
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/nlp/token/parser/opennlp/impl/NCOpenNlpImpl.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/nlp/token/parser/opennlp/impl/NCOpenNlpImpl.scala
index dfc3c69..867c393 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/nlp/token/parser/opennlp/impl/NCOpenNlpImpl.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/nlp/token/parser/opennlp/impl/NCOpenNlpImpl.scala
@@ -70,7 +70,7 @@ class NCOpenNlpImpl(
     @volatile var tokenizer: TokenizerME = _
     @volatile var tagger: POSTaggerME = _
     @volatile var lemmatizer: DictionaryLemmatizer = _
-    @volatile var sw: NCEnStopWordsFinder = _
+    @volatile var swFinder: NCEnStopWordsFinder = _
 
     private var addStopWords: JSet[String] = _
     private var exclStopWords: JSet[String] = _
@@ -80,10 +80,14 @@ class NCOpenNlpImpl(
             () => tokenizer = new TokenizerME(new TokenizerModel(tokMdlIn)),
             () => tagger = new POSTaggerME(new POSModel(posMdlIn)),
             () => lemmatizer = new DictionaryLemmatizer(lemmaDicIn),
-            () => sw = new NCEnStopWordsFinder(stem(addStopWords), stem(exclStopWords))
+            () => swFinder = new NCEnStopWordsFinder(stem(addStopWords), stem(exclStopWords))
         )(ExecutionContext.Implicits.global)
 
-    override def stop(): Unit = sw = null; lemmatizer = null; tagger = null; lemmatizer = null
+    override def stop(): Unit =
+        swFinder = null
+        lemmatizer = null
+        tagger = null
+        lemmatizer = null
 
     /**
       *
@@ -114,7 +118,7 @@ class NCOpenNlpImpl(
       * @param set
       */
     private def stem(set: JSet[String]): Set[String] =
-        if (set == null) Set.empty else set.asScala.toSet.map(stemmer.stem)
+        if set == null then Set.empty else set.asScala.toSet.map(stemmer.stem)
 
     /**
       *
@@ -122,10 +126,6 @@ class NCOpenNlpImpl(
       * @return
       */
     override def parse(req: NCRequest): JList[NCToken] =
-        // TODO: check started?
-        if (tokenizer == null)
-            throw new IllegalStateException(s"${this.getClass.getName} is not started.")
-
         // OpenNLP classes are not thread-safe.
         this.synchronized {
             val sen = req.getNormalizedText
@@ -161,24 +161,23 @@ class NCOpenNlpImpl(
                     case (lemma, idx) => fixes.getOrElse(idx, lemma)
                 }
 
-            val res: Seq[NCToken] =
-                holders.zip(posTags).zip(lemmas).toIndexedSeq.map { case ((h, pos), lemma) =>
-                    new NCParameterizedAdapter with NCToken:
-                        override def getOriginalText: String = h.origin
-                        override def getNormalizedText: String = h.normalized
-                        override def getLemma: String = lemma
-                        override def getStem: String = stemmer.stem(h.normalized)
-                        override def getPos: String = pos
-                        override def isStopWord: Boolean = false
-                        override def getStartCharIndex: Int = h.start
-                        override def getEndCharIndex: Int = h.end
-                        override def getLength: Int = h.length
-                }
+            val res: Seq[NCToken] = holders.zip(posTags).zip(lemmas).toIndexedSeq.map { case ((h, pos), lemma) =>
+                new NCParameterizedAdapter with NCToken:
+                    override def getOriginalText: String = h.origin
+                    override def getNormalizedText: String = h.normalized
+                    override def getLemma: String = lemma
+                    override def getStem: String = stemmer.stem(h.normalized)
+                    override def getPos: String = pos
+                    override def isStopWord: Boolean = false
+                    override def getStartCharIndex: Int = h.start
+                    override def getEndCharIndex: Int = h.end
+                    override def getLength: Int = h.length
+            }
 
-            val stops = sw.find(res)
+            val stops = swFinder.find(res)
 
             res.map(tok =>
-                if (stops.contains(tok))
+                if stops.contains(tok) then
                     new NCParameterizedAdapter with NCToken:
                         override def getOriginalText: String = tok.getOriginalText
                         override def getNormalizedText: String = tok.getNormalizedText
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/util/NCUtils.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/util/NCUtils.scala
index aa05e79..9b4c7d9 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/util/NCUtils.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/util/NCUtils.scala
@@ -313,7 +313,7 @@ object NCUtils extends LazyLogging:
       * @tparam T
       * @return
       */
-    def notNull[T <: AnyRef](v: T, dflt: T): T = if (v == null) dflt else v
+    def notNull[T <: AnyRef](v: T, dflt: T): T = if v == null then dflt else v
 
     /**
       * Strips ANSI escape sequences from the given string.
@@ -486,7 +486,7 @@ object NCUtils extends LazyLogging:
                     s"$exClsName$errMsg $ansiCyanFg->$ansiReset ($fileName:$lineNum)"
 
             msg.split("\n").foreach(line => {
-                val s = s"${" " * indent}${if (first) ansiBlue("+-+ ") else "   "}${bo(y(line))}"
+                val s = s"${" " * indent}${if first then ansiBlue("+-+ ") else "   "}${bo(y(line))}"
                 logger.log(s)
                 first = false
             })
@@ -742,11 +742,7 @@ object NCUtils extends LazyLogging:
                 import java.util.*
 
                 // Could be long for large sequences...
-                val seq =
-                    if (sort)
-                        lines.map(_.toString).toSeq.sorted
-                    else
-                        lines
+                val seq = if sort then lines.map(_.toString).toSeq.sorted else lines
 
                 ps.println(s"#")
                 ps.println(s"# Licensed to the Apache Software Foundation (ASF) under one or more")
@@ -847,14 +843,12 @@ object NCUtils extends LazyLogging:
         try
             Using.resource(new GZIPOutputStream(new FileOutputStream(gz))) { stream =>
                 stream.write(readFileBytes(f))
-
                 stream.flush()
             }
         catch
             case e: IOException => throw new NCException(s"Error gzip file: $f", e)
 
-        if (!f.delete())
-            throw new NCException(s"Error while deleting file: $f")
+        if !f.delete() then throw new NCException(s"Error while deleting file: $f")
 
         logger.trace(s"File gzipped [source=$f, destination=$gz]")
 
@@ -983,18 +977,13 @@ object NCUtils extends LazyLogging:
           */
         def permutations(toks: Seq[NCToken]): Seq[Seq[NCToken]] = 
             def multiple(seq: Seq[Seq[Option[NCToken]]], t: NCToken): Seq[Seq[Option[NCToken]]] =
-                if (seq.isEmpty)
-                    if (t.isStopWord) IndexedSeq(IndexedSeq(Some(t)), IndexedSeq(None)) else IndexedSeq(IndexedSeq(Some(t)))
-                else {
-                    (for (subSeq <- seq) yield subSeq :+ Some(t)) ++
-                        (if (t.isStopWord) for (subSeq <- seq) yield subSeq :+ None else Seq.empty)
-                }
+                if seq.isEmpty then
+                    if t.isStopWord then IndexedSeq(IndexedSeq(Some(t)), IndexedSeq(None)) else IndexedSeq(IndexedSeq(Some(t)))
+                else
+                    (for (subSeq <- seq) yield subSeq :+ Some(t)) ++ (if t.isStopWord then for (subSeq <- seq) yield subSeq :+ None else Seq.empty)
 
             var res: Seq[Seq[Option[NCToken]]] = Seq.empty
-
-            for (t <- toks)
-                res = multiple(res, t)
-
+            for (t <- toks) res = multiple(res, t)
             res.map(_.flatten).filter(_.nonEmpty)
 
         tokenMix(tokens, stopWords = true, maxLen).