You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nlpcraft.apache.org by se...@apache.org on 2021/09/09 08:18:39 UTC

[incubator-nlpcraft] branch NLPCRAFT-435 created (now 527493e)

This is an automated email from the ASF dual-hosted git repository.

sergeykamov pushed a change to branch NLPCRAFT-435
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git.


      at 527493e  WIP.

This branch includes the following new commits:

     new 527493e  WIP.

The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


[incubator-nlpcraft] 01/01: WIP.

Posted by se...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

sergeykamov pushed a commit to branch NLPCRAFT-435
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git

commit 527493e9536d6c260ae4165b7f85354a781419a6
Author: Sergey Kamov <sk...@gmail.com>
AuthorDate: Thu Sep 9 11:18:19 2021 +0300

    WIP.
---
 nlpcraft/src/main/resources/date/full.txt.gz       | Bin 6985934 -> 24138388 bytes
 nlpcraft/src/main/resources/date/parts.txt.gz      | Bin 8146091 -> 7563491 bytes
 nlpcraft/src/main/resources/date/parts_dmy.txt.gz  | Bin 4986446 -> 4765022 bytes
 nlpcraft/src/main/resources/date/parts_mdy.txt.gz  | Bin 4955873 -> 4744459 bytes
 nlpcraft/src/main/resources/date/parts_ymd.txt.gz  | Bin 5326821 -> 5096584 bytes
 .../org/apache/nlpcraft/common/util/NCUtils.scala  |   2 +-
 .../server/nlp/enrichers/date/NCDateEnricher.scala |  64 +++++++++++----------
 .../nlp/enrichers/date/tools/NCDateGenerator.scala |  22 +++----
 .../nlp/enrichers/date/NCEnricherDateSpec.scala    |   3 +-
 9 files changed, 47 insertions(+), 44 deletions(-)

diff --git a/nlpcraft/src/main/resources/date/full.txt.gz b/nlpcraft/src/main/resources/date/full.txt.gz
index 37216a0..15d7718 100644
Binary files a/nlpcraft/src/main/resources/date/full.txt.gz and b/nlpcraft/src/main/resources/date/full.txt.gz differ
diff --git a/nlpcraft/src/main/resources/date/parts.txt.gz b/nlpcraft/src/main/resources/date/parts.txt.gz
index 4b7008a..9c90460 100644
Binary files a/nlpcraft/src/main/resources/date/parts.txt.gz and b/nlpcraft/src/main/resources/date/parts.txt.gz differ
diff --git a/nlpcraft/src/main/resources/date/parts_dmy.txt.gz b/nlpcraft/src/main/resources/date/parts_dmy.txt.gz
index 42d35d7..58b2792 100644
Binary files a/nlpcraft/src/main/resources/date/parts_dmy.txt.gz and b/nlpcraft/src/main/resources/date/parts_dmy.txt.gz differ
diff --git a/nlpcraft/src/main/resources/date/parts_mdy.txt.gz b/nlpcraft/src/main/resources/date/parts_mdy.txt.gz
index faeac93..834719e 100644
Binary files a/nlpcraft/src/main/resources/date/parts_mdy.txt.gz and b/nlpcraft/src/main/resources/date/parts_mdy.txt.gz differ
diff --git a/nlpcraft/src/main/resources/date/parts_ymd.txt.gz b/nlpcraft/src/main/resources/date/parts_ymd.txt.gz
index 22f38bf..da5d4ba 100644
Binary files a/nlpcraft/src/main/resources/date/parts_ymd.txt.gz and b/nlpcraft/src/main/resources/date/parts_ymd.txt.gz differ
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/util/NCUtils.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/util/NCUtils.scala
index d8c1900..d2c2b03 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/util/NCUtils.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/util/NCUtils.scala
@@ -552,7 +552,7 @@ object NCUtils extends LazyLogging {
      * @return
      */
     private def readLcTrimFilter(in: BufferedSource): List[String] =
-        in.getLines().map(_.toLowerCase.strip).filter(s => s.nonEmpty && !s.startsWith("#")).toList
+        in.getLines().map(_.toLowerCase.strip).filter(s => s.nonEmpty && s.head!= '#').toList
 
     /**
       * Reads lines from given file converting to lower case, trimming, and filtering
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/date/NCDateEnricher.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/date/NCDateEnricher.scala
index a4e8e11..9d7a549 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/date/NCDateEnricher.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/date/NCDateEnricher.scala
@@ -27,12 +27,9 @@ import org.apache.nlpcraft.server.nlp.enrichers.date.NCDateConstants._
 import org.apache.nlpcraft.server.nlp.enrichers.date.NCDateFormatType._
 
 import java.util
-import java.util.{Calendar => C}
-import java.util.{List => JList}
-
+import java.util.{Calendar => C, List => JList}
 import scala.collection.immutable.Iterable
 import scala.collection.mutable
-import scala.collection.mutable.{LinkedHashMap => LHM}
 import scala.concurrent.ExecutionContext
 import scala.jdk.CollectionConverters.ListHasAsScala
 
@@ -40,8 +37,6 @@ import scala.jdk.CollectionConverters.ListHasAsScala
   * Date enricher.
   */
 object NCDateEnricher extends NCServerEnricher {
-    private type LHM_SS = LHM[String, String]
-
     private object Config extends NCConfigurable {
         def style: NCDateFormatType = getObject("nlpcraft.server.datesFormatStyle", NCDateFormatType.withName)
     }
@@ -55,8 +50,8 @@ object NCDateEnricher extends NCServerEnricher {
     private[date] val prepsBtwIncl = mkBetweenPrepositions(BETWEEN_INCLUSIVE)
     private[date] val prepsBtwExcl = mkBetweenPrepositions(BETWEEN_EXCLUSIVE)
     
-    @volatile private var cacheFull: LHM_SS = _
-    @volatile private var cacheParts: LHM_SS = _
+    private val cacheFull = new util.HashMap[String, String]()
+    private val cacheParts = new util.HashMap[String, String]()
 
     // Preposition data holder.
     case class P(text: String) {
@@ -121,6 +116,10 @@ object NCDateEnricher extends NCServerEnricher {
      */
     override def stop(parent: Span = null): Unit = startScopedSpan("stop", parent) { _ =>
         ackStopping()
+
+        cacheFull.clear()
+        cacheParts.clear()
+
         ackStopped()
     }
 
@@ -132,18 +131,25 @@ object NCDateEnricher extends NCServerEnricher {
     override def start(parent: Span = null): NCService = startScopedSpan("start", parent) { span =>
         ackStarting()
 
-        def read(res: String): LHM_SS = {
+        val sep = '|'.asInstanceOf[Int]
+
+        def read(dest: util.Map[String, String], res: String): Unit =
+            U.readTextGzipResource(res, "UTF-8", logger).foreach(p => {
+                val idx = p.indexOf(sep)
+
+                // Data already trimmed.
+                dest.put(p.take(idx), p.drop(idx + 1))
+            })
+
+        def readCommon(dest: util.Map[String, String], res: String): Unit = {
             startScopedSpan("read", span, "res" -> res) { _ =>
-                val m: LHM_SS = new LHM_SS()
-             
-                val map = U.readTextGzipResource(res, "UTF-8", logger).map(p => {
-                    val idx = p.indexOf("|")
-                    p.take(idx).strip -> p.drop(idx + 1).trim
-                })
-             
-                m ++= map
-             
-                m
+                val m = new util.HashMap[String, String]()
+
+                read(m, res)
+
+                dest.synchronized {
+                    dest.putAll(m)
+                }
             }
         }
 
@@ -155,17 +161,12 @@ object NCDateEnricher extends NCServerEnricher {
             case _  => throw new AssertionError(s"Unexpected format type: ${Config.style}")
         }
 
-        var p1: LHM_SS = null
-        var p2: LHM_SS = null
-
         U.executeParallel(
-            () => cacheFull = read("date/full.txt.gz"),
-            () => p1 = read("date/parts.txt.gz"),
-            () => p2 = read(s"date/$file")
+            () => read(cacheFull, "date/full.txt.gz"),
+            () => readCommon(cacheParts, "date/parts.txt.gz"),
+            () => readCommon(cacheParts, s"date/$file")
         )
 
-        cacheParts = p1 ++ p2
-
         ackStarted()
     }
 
@@ -342,12 +343,13 @@ object NCDateEnricher extends NCServerEnricher {
                     }
 
                     cacheFull.get(s) match {
-                        case Some(body) => add(body, isFull = true)
-                        case None =>
+                        case null =>
                             cacheParts.get(s) match {
-                                case Some(body) => add(body, isFull = false)
-                                case None => // No-op.
+                                case null => // No-op.
+                                case body => add(body, isFull = false)
                             }
+
+                        case body => add(body, isFull = true)
                     }
                 }
             }
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/date/tools/NCDateGenerator.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/date/tools/NCDateGenerator.scala
index 646267c..6fbbff7 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/date/tools/NCDateGenerator.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/date/tools/NCDateGenerator.scala
@@ -17,17 +17,16 @@
 
 package org.apache.nlpcraft.server.nlp.enrichers.date.tools
 
-import java.text.{DateFormat, SimpleDateFormat}
-import java.util.{Date, Locale, Calendar => C}
 import org.apache.nlpcraft.common._
 import org.apache.nlpcraft.common.nlp.numeric.NCNumericGenerator
 import org.apache.nlpcraft.server.nlp.enrichers.date.NCDateConstants._
 import org.apache.nlpcraft.server.nlp.enrichers.date.NCDateFormatType._
+import org.apache.nlpcraft.server.nlp.enrichers.date.tools.NCDateGenerator._
 
-import scala.collection.mutable.{LinkedHashMap => LHM}
-import NCDateGenerator._
-
+import java.text.{DateFormat, SimpleDateFormat}
+import java.util.{Date, Locale, Calendar => C}
 import scala.collection.mutable
+import scala.collection.mutable.{LinkedHashMap => LHM}
 
 /**
  * Pre-built date ranges generator.
@@ -106,6 +105,7 @@ object NCDateGenerator {
     private val NUM_MONTH_MAP = zipIndexes(CAL_MONTHS)
     private val MMMM_MONTH_SEQ = CAL_MONTHS.map(month)
     private val YEARS_SEQ = for (i <- 1900 to C.getInstance().get(C.YEAR) + 5) yield i
+    private val YEARS_SEQ_EXT = for (i <- 1500 to C.getInstance().get(C.YEAR) + 5) yield i
     private val MMMM_MONTH_MAP = zipIndexes(MMMM_MONTH_SEQ)
 
     // USA week.
@@ -403,7 +403,7 @@ object NCDateGenerator {
     }
 
     private[date] def years(df: LHM_SS): Unit =
-        for (y <- YEARS_SEQ)
+        for (y <- YEARS_SEQ_EXT)
             mkYears(y).foreach(s => df += s"$s" -> s"${y}y")
 
     private[date] def months(df: LHM_SS, fmts: Seq[SimpleDateFormat]): Unit = {
@@ -463,10 +463,10 @@ object NCDateGenerator {
         }
 
         // Between.
-        for ((from, to) <- BETWEEN_INCLUSIVE; y1 <- YEARS_SEQ; y2 <- YEARS_SEQ if y2 > y1)
+        for ((from, to) <- BETWEEN_INCLUSIVE; y1 <- YEARS_SEQ_EXT; y2 <- YEARS_SEQ_EXT if y2 > y1)
             addRange(from, to, y1, y2, s"${y1}y:${y2}y")
 
-        for ((from, to) <- BETWEEN_EXCLUSIVE; y1 <- YEARS_SEQ; y2 <- YEARS_SEQ if y2 > y1)
+        for ((from, to) <- BETWEEN_EXCLUSIVE; y1 <- YEARS_SEQ_EXT; y2 <- YEARS_SEQ_EXT if y2 > y1)
             addRange(from, to, y1, y2, s"${y1}y:${y2-1}y")
 
         def add(word: String, y: Int, templ: String): Unit = {
@@ -478,10 +478,10 @@ object NCDateGenerator {
         }
 
         // From.
-        for (f <- FROM; y <- YEARS_SEQ) add(f, y, toNow(s"${y}y"))
+        for (f <- FROM; y <- YEARS_SEQ_EXT) add(f, y, toNow(s"${y}y"))
 
         // Till.
-        for (t <- TO; y <- YEARS_SEQ) add(t, y, to(s"${y}y"))
+        for (t <- TO; y <- YEARS_SEQ_EXT) add(t, y, to(s"${y}y"))
     }
 
     private[date] def simpleQuarters(df: LHM_SS): Unit = {
@@ -856,7 +856,7 @@ object NCDateGenerator {
 
 object DLDateGeneratorRunner extends App {
     private def mkPath(path: String): String = U.mkPath(s"nlpcraft/src/main/resources/date/$path")
-    private def convert(entry: (String, String)): String = s"${entry._1} | ${entry._2}"
+    private def convert(entry: (String, String)): String = s"${entry._1.strip}|${entry._2.strip}"
 
     private def process(): Unit = {
         val fileFull = mkPath("full.txt")
diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/date/NCEnricherDateSpec.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/date/NCEnricherDateSpec.scala
index 429b24c..30f9a65 100644
--- a/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/date/NCEnricherDateSpec.scala
+++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/date/NCEnricherDateSpec.scala
@@ -38,7 +38,8 @@ class NCEnricherDateSpec extends NCEnricherBaseSpec {
                 "1900 year",
                 "from 1900 year",
                 "between 1900 and 1905",
-                "between 1900 and 1905 years"
+                "between 1501 and 1905 years",
+                "after 1501 year"
             ).map(txt => {
                 val f: Unit => Unit = _ => checkExists(txt, dte(text = txt))