You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@nlpcraft.apache.org by se...@apache.org on 2020/06/23 10:36:07 UTC

[incubator-nlpcraft] branch NLPCRAFT-85 created (now 322e467)

This is an automated email from the ASF dual-hosted git repository.

sergeykamov pushed a change to branch NLPCRAFT-85
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git.


      at 322e467  WIP.

This branch includes the following new commits:

     new 57d490f  Spell and Geo enricher refactored.
     new cc87529  WIP.
     new 322e467  WIP.

The 3 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.

[incubator-nlpcraft] 01/03: Spell and Geo enricher refactored.

Posted by se...@apache.org.

This is an automated email from the ASF dual-hosted git repository.

sergeykamov pushed a commit to branch NLPCRAFT-85
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git

commit 57d490f821a79e7cf228f96cf8efef1937ae7dce
Author: Sergey Kamov <se...@apache.org>
AuthorDate: Tue Jun 23 13:24:32 2020 +0300

    Spell and Geo enricher refactored.
---
 .../apache/nlpcraft/server/geo/NCGeoManager.scala  |  16 +-
 .../org/apache/nlpcraft/server/json/NCJson.scala   |  11 +
 .../server/nlp/enrichers/geo/NCGeoEnricher.scala   | 336 +++++++++++----------
 .../server/nlp/spell/NCSpellCheckManager.scala     |  43 ++-
 4 files changed, 230 insertions(+), 176 deletions(-)

diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/geo/NCGeoManager.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/geo/NCGeoManager.scala
index c4c3fa4..182cdc2 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/geo/NCGeoManager.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/geo/NCGeoManager.scala
@@ -19,6 +19,7 @@ package org.apache.nlpcraft.server.geo
 
 import io.opencensus.trace.Span
 import org.apache.nlpcraft.common.nlp.dict.{NCDictionaryManager, NCDictionaryType}
+import org.apache.nlpcraft.common.util.NCUtils
 import org.apache.nlpcraft.common.{NCService, _}
 import org.apache.nlpcraft.server.json.NCJson
 
@@ -82,7 +83,20 @@ object NCGeoManager extends NCService {
       */
     @throws[NCE]
     override def start(parent: Span = null): NCService = startScopedSpan("start", parent) { _ ⇒
-        model = readAndConstructModel(true)
+        val ok =
+            Seq(
+                COUNTRY_DIR ,
+                CONT_PATH,
+                METRO_PATH,
+                SYNONYMS_DIR_PATH,
+                CASE_SENSITIVE_DIR_PATH
+            ).forall(NCUtils.hasResource)
+
+        if (ok)
+            model = readAndConstructModel(true)
+        else
+            // TODO: warning text.
+            logger.warn(s"Some GEO Data not found for some reasons")
         
         super.start()
     }
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/json/NCJson.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/json/NCJson.scala
index 99c5fe6..08e3981 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/json/NCJson.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/json/NCJson.scala
@@ -300,6 +300,17 @@ object NCJson {
             case e: Throwable ⇒ throw new NCE(s"Failed to parse: $res", e)
         }
 
+    /**
+      * Extracts type optional `T` from given JSON `file`.
+      *
+      * @param res Resource to extract from.
+      * @param ignoreCase Whether or not to ignore case.
+      * @tparam T Type of the object to extract.
+      */
+    @throws[NCE]
+    def extractResourceOpt[T: Manifest](res: String, ignoreCase: Boolean): Option[T] =
+        if (U.hasResource(res)) Some(extractResource(res, ignoreCase)) else None
+
     // Gets string with removed symbol + from exponent part of numbers.
     // It is developed to avoid Lift parsing errors during processing numbers like '2E+2'.
     @tailrec
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/geo/NCGeoEnricher.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/geo/NCGeoEnricher.scala
index dc93989..a9a9434 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/geo/NCGeoEnricher.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/geo/NCGeoEnricher.scala
@@ -52,35 +52,18 @@ object NCGeoEnricher extends NCServerEnricher {
     // Common word exceptions configuration folder.
     private final val EXCEPTIONS_PATH = "geo/exceptions"
 
-    @throws[NCE]
-    private[geo] final val LOCATIONS: Map[String, Set[NCGeoEntry]] = NCGeoManager.getModel.synonyms
-
-    // GEO names matched with common english words and user defined exception GEO names.
-    // Note that 'ignore case' parameter set as false because DLGeoLocationKind definition (CITY ect)
-    @throws[NCE]
-    // TODO: refactor... incomprehensible!
-    private final val COMMONS: Map[NCGeoLocationKind, Set[String]]  =
-        U.getFilesResources(EXCEPTIONS_PATH).
-            flatMap(f ⇒
-                NCJson.extractResource[immutable.Map[String, immutable.Set[String]]](f, ignoreCase = false).
-                    map(p ⇒ NCGeoLocationKind.withName(p._1.toUpperCase) → p._2)
-            ).groupBy(_._1).map(p ⇒ p._1 → p._2.flatMap(_._2).toSet).map(p ⇒ p._1 → p._2.map(_.toLowerCase))
-
     private final val GEO_TYPES: Set[String] = NCGeoLocationKind.values.map(mkName)
 
+    @volatile private[geo] var locations: Map[String, Set[NCGeoEntry]] = _
+    @volatile private var commons: Map[NCGeoLocationKind, Set[String]] = _
+    @volatile private var topUsa: Set[String] = _
+    @volatile private var topWorld: Set[String] = _
+
     // JSON extractor for largest cities.
     case class TopCity(name: String, region: String)
 
     private def glue(s: String*): String = s.map(_.toLowerCase).mkString("|")
 
-    private final val TOP_USA: Set[String] =
-        NCJson.extractResource[List[TopCity]](US_TOP_PATH, ignoreCase = true).
-            map(city ⇒ glue(city.name, city.region)).toSet
-
-    private final val TOP_WORLD: Set[String] =
-        NCJson.extractResource[List[TopCity]](WORLD_TOP_PATH, ignoreCase = true).
-            map(city ⇒ glue(city.name, city.region)).toSet
-
     private def isConflictName(name: String): Boolean =
         US_CONFLICT_STATES.contains(name.toLowerCase) && name.exists(_.isLower)
 
@@ -92,167 +75,202 @@ object NCGeoEnricher extends NCServerEnricher {
     private def getGeoNotes(t: NCNlpSentenceToken): Set[NCNlpSentenceNote] = GEO_TYPES.flatMap(t.getNotes)
 
     override def stop(parent: Span = null): Unit = startScopedSpan("stop", parent) { _ ⇒
+        locations = null
+        commons = null
+        topUsa = null
+        topWorld = null
+
         super.stop()
     }
 
+    @throws[NCE]
     override def start(parent: Span = null): NCService = startScopedSpan("start", parent) { _ ⇒
+         NCGeoManager.getModel match {
+            case null ⇒
+                // TODO: warning text.
+                logger.warn(s"Some GEO Data not found for some reasons")
+
+            case mdl ⇒
+                locations = mdl.synonyms
+
+                // TODO: refactor... incomprehensible!
+                // GEO names matched with common english words and user defined exception GEO names.
+                // Note that 'ignore case' parameter set as false because DLGeoLocationKind definition (CITY ect)
+                commons =
+                    U.getFilesResources(EXCEPTIONS_PATH).
+                        flatMap(f ⇒
+                            NCJson.extractResource[immutable.Map[String, immutable.Set[String]]](f, ignoreCase = false).
+                                map(p ⇒ NCGeoLocationKind.withName(p._1.toUpperCase) → p._2)
+                        ).groupBy(_._1).map(p ⇒ p._1 → p._2.flatMap(_._2).toSet).map(p ⇒ p._1 → p._2.map(_.toLowerCase))
+
+                topUsa =
+                    NCJson.extractResource[List[TopCity]](US_TOP_PATH, ignoreCase = true).
+                        map(city ⇒ glue(city.name, city.region)).toSet
+
+                topWorld =
+                    NCJson.extractResource[List[TopCity]](WORLD_TOP_PATH, ignoreCase = true).
+                        map(city ⇒ glue(city.name, city.region)).toSet
+         }
+
         super.start()
     }
 
     @throws[NCE]
     override def enrich(ns: NCNlpSentence, parent: Span = null): Unit =
         startScopedSpan("enrich", parent, "srvReqId" → ns.srvReqId, "txt" → ns.text) { _ ⇒
-            // This stage must not be 1st enrichment stage.
-            assume(ns.nonEmpty)
+            if (locations != null) {
+                // This stage must not be 1st enrichment stage.
+                assume(ns.nonEmpty)
 
-            for (toks ← ns.tokenMixWithStopWords(withQuoted = true)) {
-                def mkNote(kind: NCGeoLocationKind, seq: (String, Any)*): NCNlpSentenceNote =
-                    NCNlpSentenceNote(toks.map(_.index), mkName(kind), seq :_*)
+                for (toks ← ns.tokenMixWithStopWords(withQuoted = true)) {
+                    def mkNote(kind: NCGeoLocationKind, seq: (String, Any)*): NCNlpSentenceNote =
+                        NCNlpSentenceNote(toks.map(_.index), mkName(kind), seq: _*)
 
-                def toSerializable(m: Map[String, Any]): java.io.Serializable= {
-                    val ser = new util.HashMap[String, Object]()
+                    def toSerializable(m: Map[String, Any]): java.io.Serializable = {
+                        val ser = new util.HashMap[String, Object]()
 
-                    m.foreach { case (k, v) ⇒ ser.put(k, v.asInstanceOf[Object]) }
+                        m.foreach { case (k, v) ⇒ ser.put(k, v.asInstanceOf[Object]) }
 
-                    ser
-                }
-
-                def make(e: NCGeoEntry): NCNlpSentenceNote =
-                    e match {
-                        case x: NCGeoMetro ⇒
-                            mkNote(
-                                METRO,
-                                "metro" → x.name
-                            )
-
-                        case x: NCGeoContinent ⇒
-                            mkNote(
-                                CONTINENT,
-                                "continent" → x.name
-                            )
-
-                        case x: NCGeoSubContinent ⇒
-                            mkNote(
-                                SUBCONTINENT,
-                                "continent" → x.continent.name,
-                                "subcontinent" → x.name
-                            )
-
-                        case x: NCGeoCountry ⇒
-                            mkNote(
-                                COUNTRY,
-                                "continent" → x.subContinent.continent.name,
-                                "subcontinent" → x.subContinent.name,
-                                "country" → x.name,
-                                "countrymeta" → toSerializable(x.meta)
-                            )
-
-                        case x: NCGeoRegion ⇒
-                            mkNote(
-                                REGION,
-                                "continent" → x.country.subContinent.continent.name,
-                                "subcontinent" → x.country.subContinent.name,
-                                "country" → x.country.name,
-                                "region" → x.name,
-                                "countrymeta" → toSerializable(x.country.meta)
-                            )
-
-                        case x: NCGeoCity ⇒
-                            mkNote(
-                                CITY,
-                                "continent" → x.region.country.subContinent.continent.name,
-                                "subcontinent" → x.region.country.subContinent.name,
-                                "country" → x.region.country.name,
-                                "region" → x.region.name,
-                                "city" → x.name,
-                                "countrymeta" → toSerializable(x.region.country.meta),
-                                "citymeta" → toSerializable(x.meta)
-                            )
-                            
-                        case _ ⇒ throw new AssertionError(s"Unexpected data: $e")
+                        ser
                     }
 
-                def addAll(locs: Set[NCGeoEntry]): Unit =
-                    for (loc ← locs) {
-                        val note = make(loc)
+                    def make(e: NCGeoEntry): NCNlpSentenceNote =
+                        e match {
+                            case x: NCGeoMetro ⇒
+                                mkNote(
+                                    METRO,
+                                    "metro" → x.name
+                                )
+
+                            case x: NCGeoContinent ⇒
+                                mkNote(
+                                    CONTINENT,
+                                    "continent" → x.name
+                                )
+
+                            case x: NCGeoSubContinent ⇒
+                                mkNote(
+                                    SUBCONTINENT,
+                                    "continent" → x.continent.name,
+                                    "subcontinent" → x.name
+                                )
+
+                            case x: NCGeoCountry ⇒
+                                mkNote(
+                                    COUNTRY,
+                                    "continent" → x.subContinent.continent.name,
+                                    "subcontinent" → x.subContinent.name,
+                                    "country" → x.name,
+                                    "countrymeta" → toSerializable(x.meta)
+                                )
+
+                            case x: NCGeoRegion ⇒
+                                mkNote(
+                                    REGION,
+                                    "continent" → x.country.subContinent.continent.name,
+                                    "subcontinent" → x.country.subContinent.name,
+                                    "country" → x.country.name,
+                                    "region" → x.name,
+                                    "countrymeta" → toSerializable(x.country.meta)
+                                )
+
+                            case x: NCGeoCity ⇒
+                                mkNote(
+                                    CITY,
+                                    "continent" → x.region.country.subContinent.continent.name,
+                                    "subcontinent" → x.region.country.subContinent.name,
+                                    "country" → x.region.country.name,
+                                    "region" → x.region.name,
+                                    "city" → x.name,
+                                    "countrymeta" → toSerializable(x.region.country.meta),
+                                    "citymeta" → toSerializable(x.meta)
+                                )
+
+                            case _ ⇒ throw new AssertionError(s"Unexpected data: $e")
+                        }
 
-                        toks.foreach(t ⇒ t.add(note))
+                    def addAll(locs: Set[NCGeoEntry]): Unit =
+                        for (loc ← locs) {
+                            val note = make(loc)
 
-                        // Other types(JJ etc) and quoted word are not re-marked.
-                        toks.filter(t ⇒ !NCPennTreebank.NOUNS_POS.contains(t.pos) && t.pos != "FW").
-                            foreach(t ⇒ ns.fixNote(t.getNlpNote, "pos" → NCPennTreebank.SYNTH_POS))
-                    }
+                            toks.foreach(t ⇒ t.add(note))
 
-                LOCATIONS.get(toks.map(_.normText).mkString(" ")) match {
-                    case Some(locs) ⇒
-                        // If multiple token match - add it.
-                        if (toks.length > 1)
-                            addAll(locs)
-                        else {
-                            // Only one token - toks.length == 1
-                            val t = toks.head
-
-                            // If LOCATION or noun - add it.
-                            if (NCPennTreebank.NOUNS_POS.contains(t.pos))
+                            // Other types(JJ etc) and quoted word are not re-marked.
+                            toks.filter(t ⇒ !NCPennTreebank.NOUNS_POS.contains(t.pos) && t.pos != "FW").
+                                foreach(t ⇒ ns.fixNote(t.getNlpNote, "pos" → NCPennTreebank.SYNTH_POS))
+                        }
+
+                    locations.get(toks.map(_.normText).mkString(" ")) match {
+                        case Some(locs) ⇒
+                            // If multiple token match - add it.
+                            if (toks.length > 1)
                                 addAll(locs)
-                            // If US state - add it.
-                            else
-                            // For now - simply ignore abbreviations for US states that
-                            // conflict with commonly used English words. User will have to
-                            // use full names.
-                            if (!isConflictName(t.origText)) {
-                                def isTopCity(g: NCGeoCity): Boolean = {
-                                    val name = glue(g.name, g.region.name)
-
-                                    TOP_USA.contains(name) || TOP_WORLD.contains(name)
-                                }
+                            else {
+                                // Only one token - toks.length == 1
+                                val t = toks.head
+
+                                // If LOCATION or noun - add it.
+                                if (NCPennTreebank.NOUNS_POS.contains(t.pos))
+                                    addAll(locs)
+                                // If US state - add it.
+                                else
+                                // For now - simply ignore abbreviations for US states that
+                                // conflict with commonly used English words. User will have to
+                                // use full names.
+                                if (!isConflictName(t.origText)) {
+                                    def isTopCity(g: NCGeoCity): Boolean = {
+                                        val name = glue(g.name, g.region.name)
+
+                                        topUsa.contains(name) || topWorld.contains(name)
+                                    }
 
-                                addAll(locs.collect {
-                                    case g: NCGeoContinent ⇒ g
-                                    case g: NCGeoSubContinent ⇒ g
-                                    case g: NCGeoCountry ⇒ g
-                                    case g: NCGeoMetro ⇒ g
-                                    case g: NCGeoRegion if g.country.name == "united states" ⇒ g
-                                    case g: NCGeoCity if isTopCity(g) ⇒ g
-                                })
+                                    addAll(locs.collect {
+                                        case g: NCGeoContinent ⇒ g
+                                        case g: NCGeoSubContinent ⇒ g
+                                        case g: NCGeoCountry ⇒ g
+                                        case g: NCGeoMetro ⇒ g
+                                        case g: NCGeoRegion if g.country.name == "united states" ⇒ g
+                                        case g: NCGeoCity if isTopCity(g) ⇒ g
+                                    })
+                                }
+                                // In all other cases - ignore one-token match.
                             }
-                            // In all other cases - ignore one-token match.
-                        }
-                    case None ⇒
-                        // Case sensitive synonyms.
-                        LOCATIONS.get(toks.map(_.origText).mkString(" ")) match {
-                            case Some(locs) ⇒ addAll(locs)
-                            case None ⇒
-                                // If there is no direct match try to convert JJs to NNs and re-check
-                                // for a possible match, e.g. "american" ⇒ "america".
-                                if (toks.size == 1) {
-                                    val tok = toks.head
-
-                                    if (NCPennTreebank.JJS_POS.contains(tok.pos)) {
-                                        var endLoop = false
-
-                                        for (noun ← NCWordNetManager.getNNsForJJ(tok.normText); if !endLoop) {
-                                            def onResult(locs: Set[NCGeoEntry]): Unit = {
-                                                addAll(locs)
-                                                endLoop = true
-                                            }
-
-                                            LOCATIONS.get(noun) match {
-                                                case Some(locs) ⇒ onResult(locs)
-                                                case None ⇒
-                                                    LOCATIONS.get(noun.toLowerCase) match {
-                                                        case Some(locs) ⇒ onResult(locs)
-                                                        case None ⇒ // No-op.
-                                                    }
+                        case None ⇒
+                            // Case sensitive synonyms.
+                            locations.get(toks.map(_.origText).mkString(" ")) match {
+                                case Some(locs) ⇒ addAll(locs)
+                                case None ⇒
+                                    // If there is no direct match try to convert JJs to NNs and re-check
+                                    // for a possible match, e.g. "american" ⇒ "america".
+                                    if (toks.size == 1) {
+                                        val tok = toks.head
+
+                                        if (NCPennTreebank.JJS_POS.contains(tok.pos)) {
+                                            var endLoop = false
+
+                                            for (noun ← NCWordNetManager.getNNsForJJ(tok.normText); if !endLoop) {
+                                                def onResult(locs: Set[NCGeoEntry]): Unit = {
+                                                    addAll(locs)
+                                                    endLoop = true
+                                                }
+
+                                                locations.get(noun) match {
+                                                    case Some(locs) ⇒ onResult(locs)
+                                                    case None ⇒
+                                                        locations.get(noun.toLowerCase) match {
+                                                            case Some(locs) ⇒ onResult(locs)
+                                                            case None ⇒ // No-op.
+                                                        }
+                                                }
                                             }
                                         }
                                     }
-                                }
-                        }
+                            }
+                    }
                 }
-            }
 
-            collapse(ns)
+                collapse(ns)
+            }
         }
 
     private def getValue(note: NCNlpSentenceNote, key: String): String = note(key).asInstanceOf[String]
@@ -313,7 +331,7 @@ object NCGeoEnricher extends NCServerEnricher {
         val excls = new mutable.HashSet[NCNlpSentenceNote]() ++ getGeoNotes(ns).filter(note ⇒ {
             val kind = extractKind(note)
 
-            COMMONS.get(kind) match {
+            commons.get(kind) match {
                 // GEO is common word defined directly or via synonym.
                 case Some(cs) ⇒
                     cs.contains(getName(kind, note)) ||
@@ -416,9 +434,9 @@ object NCGeoEnricher extends NCServerEnricher {
                 case CITY ⇒
                     val cityReg = glue(get("city"), get("region"))
 
-                    if (TOP_WORLD.contains(cityReg))
+                    if (topWorld.contains(cityReg))
                         2
-                    else if (TOP_USA.contains(cityReg))
+                    else if (topUsa.contains(cityReg))
                         1
                     else
                         0
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/spell/NCSpellCheckManager.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/spell/NCSpellCheckManager.scala
index 0782359..5f9f0e6 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/spell/NCSpellCheckManager.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/spell/NCSpellCheckManager.scala
@@ -28,12 +28,10 @@ import scala.collection._
   */
 object NCSpellCheckManager extends NCService {
     case class Record(correct: String, misspellings: Seq[String])
+
+    private final val PATH = "spell/dictionary.json"
     
-    private val dict: Map[String, String] = (
-        for (rec ← NCJson.extractResource[List[Record]]("spell/dictionary.json", ignoreCase = true)) yield {
-            for (v ← rec.misspellings) yield v → rec.correct
-        })
-        .flatten.toMap
+    @volatile private var dict: Map[String, String] = _
     
     private def isWordUpper(s: String): Boolean = s.forall(_.isUpper)
     private def isHeadUpper(s: String): Boolean = s.head.isUpper
@@ -47,10 +45,22 @@ object NCSpellCheckManager extends NCService {
             s // Full lower case by default.
     
     override def start(parent: Span): NCService = startScopedSpan("start", parent) { _ ⇒
+        NCJson.extractResourceOpt[List[Record]](PATH, ignoreCase = true) match {
+            case Some(recs) ⇒
+                dict = (for (rec ← recs) yield { for (v ← rec.misspellings) yield v → rec.correct } ).flatten.toMap
+            case None ⇒
+                // TODO: warning text.
+                logger.warn(s"Data not found for some reasons: $PATH")
+
+                dict = Map.empty
+        }
+
         super.start()
     }
     
     override def stop(parent: Span): Unit = startScopedSpan("stop", parent) { _ ⇒
+        dict = null
+
         super.stop()
     }
     
@@ -62,15 +72,16 @@ object NCSpellCheckManager extends NCService {
       *
       * @param in Word to check.
       */
-    def check(in: String): String = dict.get(in.toLowerCase) match {
-        case None ⇒ in
-        case Some(out) ⇒
-            val inSeq = split(in)
-            val outSeq = split(out)
-            
-            if (inSeq.lengthCompare(outSeq.size) == 0)
-                outSeq.zip(inSeq).map(p ⇒ processCase(p._1, p._2)).mkString(" ")
-            else
-                processCase(out, in)
-    }
+    def check(in: String): String =
+        dict.get(in.toLowerCase) match {
+            case None ⇒ in
+            case Some(out) ⇒
+                val inSeq = split(in)
+                val outSeq = split(out)
+
+                if (inSeq.lengthCompare(outSeq.size) == 0)
+                    outSeq.zip(inSeq).map(p ⇒ processCase(p._1, p._2)).mkString(" ")
+                else
+                    processCase(out, in)
+        }
 }

[incubator-nlpcraft] 02/03: WIP.

Posted by se...@apache.org.

This is an automated email from the ASF dual-hosted git repository.

sergeykamov pushed a commit to branch NLPCRAFT-85
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git

commit cc875293d8bad2212c074ccf4cbbea15d968602e
Author: Sergey Kamov <se...@apache.org>
AuthorDate: Tue Jun 23 13:30:50 2020 +0300

    WIP.
---
 .../server/nlp/spell/NCSpellCheckManager.scala     | 41 ++++++++++++----------
 1 file changed, 22 insertions(+), 19 deletions(-)

diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/spell/NCSpellCheckManager.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/spell/NCSpellCheckManager.scala
index 5f9f0e6..fdb1f63 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/spell/NCSpellCheckManager.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/spell/NCSpellCheckManager.scala
@@ -19,6 +19,7 @@ package org.apache.nlpcraft.server.nlp.spell
 
 import io.opencensus.trace.Span
 import org.apache.nlpcraft.common.NCService
+import org.apache.nlpcraft.common.util.NCUtils
 import org.apache.nlpcraft.server.json.NCJson
 
 import scala.collection._
@@ -45,15 +46,14 @@ object NCSpellCheckManager extends NCService {
             s // Full lower case by default.
     
     override def start(parent: Span): NCService = startScopedSpan("start", parent) { _ ⇒
-        NCJson.extractResourceOpt[List[Record]](PATH, ignoreCase = true) match {
-            case Some(recs) ⇒
-                dict = (for (rec ← recs) yield { for (v ← rec.misspellings) yield v → rec.correct } ).flatten.toMap
-            case None ⇒
-                // TODO: warning text.
-                logger.warn(s"Data not found for some reasons: $PATH")
-
-                dict = Map.empty
-        }
+        if (NCUtils.hasResource(PATH))
+            dict =
+                (for (rec ← NCJson.extractResource[List[Record]](PATH, ignoreCase = true)) yield {
+                    for (v ← rec.misspellings) yield v → rec.correct
+                }).flatten.toMap
+        else
+            // TODO: warning text.
+            logger.warn(s"Data not found for some reasons: $PATH")
 
         super.start()
     }
@@ -73,15 +73,18 @@ object NCSpellCheckManager extends NCService {
       * @param in Word to check.
       */
     def check(in: String): String =
-        dict.get(in.toLowerCase) match {
-            case None ⇒ in
-            case Some(out) ⇒
-                val inSeq = split(in)
-                val outSeq = split(out)
+        if (dict == null)
+            in
+        else
+            dict.get(in.toLowerCase) match {
+                case None ⇒ in
+                case Some(out) ⇒
+                    val inSeq = split(in)
+                    val outSeq = split(out)
 
-                if (inSeq.lengthCompare(outSeq.size) == 0)
-                    outSeq.zip(inSeq).map(p ⇒ processCase(p._1, p._2)).mkString(" ")
-                else
-                    processCase(out, in)
-        }
+                    if (inSeq.lengthCompare(outSeq.size) == 0)
+                        outSeq.zip(inSeq).map(p ⇒ processCase(p._1, p._2)).mkString(" ")
+                    else
+                        processCase(out, in)
+            }
 }

[incubator-nlpcraft] 03/03: WIP.

Posted by se...@apache.org.

This is an automated email from the ASF dual-hosted git repository.

sergeykamov pushed a commit to branch NLPCRAFT-85
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git

commit 322e4673bbaa8e11be88b02ab68f68a8566b24a9
Author: Sergey Kamov <se...@apache.org>
AuthorDate: Tue Jun 23 13:35:56 2020 +0300

    WIP.
---
 .../main/scala/org/apache/nlpcraft/examples/weather/WeatherTest.java   | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/examples/weather/WeatherTest.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/examples/weather/WeatherTest.java
index 6bd0949..1e8a30a 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/examples/weather/WeatherTest.java
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/examples/weather/WeatherTest.java
@@ -19,6 +19,7 @@ package org.apache.nlpcraft.examples.weather;
 
 import com.google.gson.Gson;
 import com.google.gson.reflect.TypeToken;
+import jdk.nashorn.internal.ir.annotations.Ignore;
 import org.apache.nlpcraft.common.NCException;
 import org.apache.nlpcraft.model.tools.test.NCTestClient;
 import org.apache.nlpcraft.model.tools.test.NCTestClientBuilder;
@@ -86,7 +87,9 @@ class WeatherTest {
         NCEmbeddedProbe.stop();
     }
 
+    // TODO: Uncomment and run with profile ``. Disabled by default for profile apache.
     @Test
+    @Ignore
     void test() throws NCException, IOException {
         // Empty parameter.
         assertTrue(cli.ask("").isFailed());