You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nlpcraft.apache.org by ar...@apache.org on 2020/12/14 02:11:37 UTC
[incubator-nlpcraft] 02/03: split-trim-filter refactoring.
This is an automated email from the ASF dual-hosted git repository.
aradzinski pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
commit 405b5ac151bc041f27fe240f0e97d2e2ac096360
Author: Aaron Radzinski <ar...@datalingvo.com>
AuthorDate: Sun Dec 13 18:00:13 2020 -0800
split-trim-filter refactoring.
---
.../nlpcraft/common/config/NCConfigurable.scala | 6 +-
.../nlpcraft/common/makro/NCMacroParser.scala | 3 +-
.../common/nlp/numeric/NCNumericGenerator.scala | 3 +-
.../common/nlp/numeric/NCNumericManager.scala | 2 +-
.../org/apache/nlpcraft/common/util/NCUtils.scala | 35 +++++++++--
.../nlpcraft/examples/sql/db/SqlBuilder.scala | 6 +-
.../nlpcraft/examples/sql/db/SqlValueLoader.scala | 4 +-
.../nlpcraft/model/tools/cmdline/NCCli.scala | 69 ++++++++++++++++++++--
.../sqlgen/impl/NCSqlModelGeneratorImpl.scala | 18 +++---
.../org/apache/nlpcraft/probe/NCProbeBoot.scala | 2 +-
.../probe/mgrs/deploy/NCDeployManager.scala | 14 ++---
.../mgrs/nlp/enrichers/limit/NCLimitEnricher.scala | 5 +-
.../geo/tools/metro/NCGeoMetroGenerator.scala | 4 +-
.../server/nlp/enrichers/date/NCDateEnricher.scala | 2 +-
.../server/nlp/enrichers/date/NCDateParser.scala | 9 +--
.../enrichers/stopword/NCStopWordEnricher.scala | 4 +-
.../server/nlp/preproc/NCPreProcessManager.scala | 10 ++--
.../server/nlp/spell/NCSpellCheckManager.scala | 2 +-
.../org/apache/nlpcraft/server/sql/NCSql.scala | 2 +-
.../apache/nlpcraft/server/sql/NCSqlManager.scala | 2 +-
.../server/sugsyn/NCSuggestSynonymManager.scala | 2 +-
.../intent/impl/NCIntentSolverEngineSpec.scala | 3 +-
.../sqlgen/impl/NCSqlModelGeneratorImplSpec.scala | 5 +-
23 files changed, 152 insertions(+), 60 deletions(-)
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/config/NCConfigurable.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/config/NCConfigurable.scala
index 222286d..5df0d27 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/config/NCConfigurable.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/config/NCConfigurable.scala
@@ -19,7 +19,7 @@ package org.apache.nlpcraft.common.config
import com.typesafe.config.{Config, ConfigFactory}
import com.typesafe.scalalogging.LazyLogging
-import org.apache.nlpcraft.common.NCE
+import org.apache.nlpcraft.common._
import scala.collection.JavaConverters._
@@ -256,7 +256,7 @@ trait NCConfigurable {
*
* @param s
*/
- private def parseCsv(s: String): Seq[String] = s.split(",").map(_.trim).filter(_.nonEmpty)
+ private def parseCsv(s: String): Seq[String] = U.splitTrimFilter(s,",")
}
object NCConfigurable extends LazyLogging {
@@ -332,7 +332,7 @@ object NCConfigurable extends LazyLogging {
else
cfg = ConfigFactory.load(tmpCfg)
- val lines = cfg.origin().description().split(",").drop(1).distinct
+ val lines = U.splitTrimFilter(cfg.origin().description(),",").drop(1).distinct
logger.info(s"NLPCraft configuration successfully loaded as a merge of: ${lines.mkString("\n + ", "\n + ", "")}")
}
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/makro/NCMacroParser.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/makro/NCMacroParser.scala
index 951dddd..fe13ce8 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/makro/NCMacroParser.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/makro/NCMacroParser.scala
@@ -293,8 +293,7 @@ class NCMacroParser {
}
// Trims all duplicate spaces.
- private def trimDupSpaces(s: String) =
- s.split(" ").map(_.trim).filter(_.nonEmpty).mkString(" ")
+ private def trimDupSpaces(s: String) = U.splitTrimFilter(s, " ").mkString(" ")
// Processes '\' escapes for '{', '}', '|', and '*'.
private def processEscapes(s: String): String = {
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/numeric/NCNumericGenerator.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/numeric/NCNumericGenerator.scala
index b0860d4..d404e8f 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/numeric/NCNumericGenerator.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/numeric/NCNumericGenerator.scala
@@ -18,6 +18,7 @@
package org.apache.nlpcraft.common.nlp.numeric
import java.text.DecimalFormat
+import org.apache.nlpcraft.common._
/**
* Converts numbers to their textual (word) presentation.
@@ -105,7 +106,7 @@ object NCNumericGenerator {
val n10x1 = convertSmall(s10x1)
- (n10x9 + n10x6 + n10x3 + n10x1).split(" ").filter(!_.isEmpty).mkString(" ")
+ U.normalize(n10x9 + n10x6 + n10x3 + n10x1," ")
}
}
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/numeric/NCNumericManager.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/numeric/NCNumericManager.scala
index be9bed3..4fa8ef5 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/numeric/NCNumericManager.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/numeric/NCNumericManager.scala
@@ -136,7 +136,7 @@ object NCNumericManager extends NCService {
ackStarting()
genNums = mapResource("numeric/numeric.txt", "utf-8", logger, {
- _.filter(s ⇒ !s.isEmpty && !s.trim.startsWith("#")).
+ _.filter(s ⇒ s.nonEmpty && !s.trim.startsWith("#")).
map(_.split("=")).
map(s ⇒ (s(1), s(0).toInt)).
toMap
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/util/NCUtils.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/util/NCUtils.scala
index 854b642..a4f614e 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/util/NCUtils.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/util/NCUtils.scala
@@ -34,7 +34,6 @@ import java.util.regex.Pattern
import java.util.stream.Collectors
import java.util.zip.{ZipInputStream, GZIPInputStream ⇒ GIS, GZIPOutputStream ⇒ GOS}
import java.util.{Locale, Properties, Random, Timer, TimerTask, Calendar ⇒ C}
-
import com.fasterxml.jackson.annotation.JsonInclude.Include
import com.fasterxml.jackson.core.`type`.TypeReference
import com.fasterxml.jackson.databind.{DeserializationFeature, ObjectMapper}
@@ -52,10 +51,10 @@ import org.apache.nlpcraft.common.version.NCVersion
import org.jsoup.Jsoup
import org.jsoup.nodes.Document
import resource._
+
import java.net.http.HttpClient
import java.net.http.HttpRequest
import java.net.http.HttpResponse
-
import scala.collection.JavaConverters._
import scala.collection._
import scala.concurrent.ExecutionContext.Implicits.global
@@ -158,6 +157,34 @@ object NCUtils extends LazyLogging {
ANSI_SEQ.matcher(s).replaceAll("")
/**
+ * Trims each sequence string and filters out empty ones.
+ *
+ * @param s String to process.
+ * @return
+ */
+ def trimFilter(s: Seq[String]): Seq[String] =
+ s.map(_.trim).filter(_.nonEmpty)
+
+ /**
+ * Splits, trims and filters empty strings for the given string.
+ *
+ * @param s String to split.
+ * @param sep Separator (regex) to split by.
+ * @return
+ */
+ def splitTrimFilter(s: String, sep: String): Seq[String] =
+ trimFilter(s.split(sep))
+
+ /**
+ *
+ * @param s
+ * @param sep
+ * @return
+ */
+ def normalize(s: String, sep: String): String =
+ splitTrimFilter(s, sep).mkString(sep)
+
+ /**
* Escapes given string for JSON according to RFC 4627 http://www.ietf.org/rfc/rfc4627.txt.
*
* @param s String to escape.
@@ -431,7 +458,7 @@ object NCUtils extends LazyLogging {
* @return
*/
private def readLcTrimFilter(in: BufferedSource): List[String] =
- in.getLines().map(_.toLowerCase.trim).filter(s ⇒ !s.isEmpty && !s.startsWith("#")).toList
+ in.getLines().map(_.toLowerCase.trim).filter(s ⇒ s.nonEmpty && !s.startsWith("#")).toList
/**
* Reads lines from given file converting to lower case, trimming, and filtering
@@ -1145,7 +1172,7 @@ object NCUtils extends LazyLogging {
*
* @param s String to check.
*/
- def neon(s: String): Boolean = s != null && !s.isEmpty
+ def neon(s: String): Boolean = s != null && s.nonEmpty
/**
* Generates (relatively) unique ID good for a short-term usage.
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/examples/sql/db/SqlBuilder.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/examples/sql/db/SqlBuilder.scala
index f75a0b4..6b56cac 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/examples/sql/db/SqlBuilder.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/examples/sql/db/SqlBuilder.scala
@@ -20,6 +20,7 @@ package org.apache.nlpcraft.examples.sql.db
import java.sql.Types
import com.typesafe.scalalogging.LazyLogging
+import org.apache.nlpcraft.common._
import org.apache.nlpcraft.model.tools.sqlgen.NCSqlJoinType._
import org.apache.nlpcraft.model.tools.sqlgen._
import org.apache.nlpcraft.model.tools.sqlgen.impl.NCSqlSortImpl
@@ -490,7 +491,7 @@ case class SqlBuilder(schema: NCSqlSchema) extends LazyLogging {
val extSorts = extendSort(sortsNorm, tblsNorm, extCols)
SqlQuery(
- sql =
+ sql = U.normalize(
s"""
|SELECT
| ${if (distinct) "DISTINCT" else ""}
@@ -499,7 +500,8 @@ case class SqlBuilder(schema: NCSqlSchema) extends LazyLogging {
| ${if (extConds.isEmpty) "" else s"WHERE ${extConds.mkString(" AND ")}"}
| ${if (extSorts.isEmpty) "" else s"ORDER BY ${extSorts.map(sql).mkString(", ")}"}
| LIMIT ${limit.flatMap(p ⇒ Some(p.getLimit)).getOrElse(DFLT_LIMIT)}
- |""".stripMargin.split(" ").map(_.trim).filter(_.nonEmpty).mkString(" "),
+ |""".stripMargin, " "
+ ),
parameters = extParams
)
}
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/examples/sql/db/SqlValueLoader.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/examples/sql/db/SqlValueLoader.scala
index 4c270e7..353107a 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/examples/sql/db/SqlValueLoader.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/examples/sql/db/SqlValueLoader.scala
@@ -40,8 +40,8 @@ class SqlValueLoader extends NCValueLoader with LazyLogging {
SqlAccess.select(SqlQuery(s"SELECT $col FROM $tab WHERE $col IS NOT NULL", Seq.empty), logResult = false).
rows.
map(_.head).
- map(_.toString.trim).
- filter(!_.isEmpty).
+ map(_.trim).
+ filter(_.nonEmpty).
map(
v ⇒ new NCValue {
override def getName: String = v
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/tools/cmdline/NCCli.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/tools/cmdline/NCCli.scala
index 2522670..82e0d0c 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/tools/cmdline/NCCli.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/tools/cmdline/NCCli.scala
@@ -1210,7 +1210,7 @@ object NCCli extends App {
value = Some("path"),
optional = true,
desc =
- s"Additional JVM classpath component that will be appended to the default NLPCraft JVM classpath. " +
+ s"Additional JVM classpath that will be appended to the default NLPCraft JVM classpath. " +
s"Although this configuration property is optional, when deploying your own models you must " +
s"provide this additional classpath for the models and their dependencies this probe will be hosting. " +
s"NOTE: this is only optional if you are running example models shipped with NLPCraft."
@@ -1273,6 +1273,58 @@ object NCCli extends App {
)
),
Command(
+ name = "test-model",
+ group = "3. Miscellaneous",
+ synopsis = s"Runs auto model validation.",
+ desc = Some(
+ s"Runs ${y("'NCTestAutoModelValidator'")} model auto-validator for given models."
+ ),
+ body = cmdTestModel,
+ params = Seq(
+ Parameter(
+ id = "cp",
+ names = Seq("--cp", "-p"),
+ value = Some("path"),
+ optional = true,
+ desc =
+ s"Additional JVM classpath that will be appended to the default NLPCraft JVM classpath. " +
+ s"Although this configuration property is optional, when testing your own models you must " +
+ s"provide this additional classpath for the models and their dependencies. " +
+ s"NOTE: this is only optional if you are testing example models shipped with NLPCraft."
+ ),
+ Parameter(
+ id = "models",
+ names = Seq("--models", "-m"),
+ value = Some("<model list>"),
+ desc =
+ s"Comma separated list of fully qualified class names for models to test. NOTE: if you provide " +
+ s"the list of your own models here - you must also provide the additional classpath " +
+ s"for them via ${y("--cp")} parameter."
+ ),
+ Parameter(
+ id = "jvmopts",
+ names = Seq("--jvm-opts", "-j"),
+ value = Some("<jvm flags>"),
+ optional = true,
+ desc =
+ s"Space separated list of JVM flags to use. If not provided, the default ${y("'-ea -Xms1024m'")} flags " +
+ s"will be used."
+ )
+ ),
+ examples = Seq(
+ Example(
+ usage = Seq(
+ s"$PROMPT $SCRIPT_NAME test-model ",
+ " --models=my.package.Model ",
+ " --cp=/opt/target/classes ",
+ " --jmv-opts=\"-ea -Xms2048m\""
+ ),
+ desc =
+ s"Runs model auto-validator for ${y("'my.package.Model'")} model."
+ )
+ )
+ ),
+ Command(
name = "info-server",
group = "1. Server & Probe Commands",
synopsis = s"Info about local server.",
@@ -1659,7 +1711,7 @@ object NCCli extends App {
case None ⇒ 2 // Default.
}
val jvmOpts = args.find(_.parameter.id == "jvmopts") match {
- case Some(arg) ⇒ stripQuotes(arg.value.get).split(" ").map(_.trim).filter(_.nonEmpty).toSeq
+ case Some(arg) ⇒ U.splitTrimFilter(stripQuotes(arg.value.get), " ")
case None ⇒ Seq("-ea", "-Xms2048m", "-XX:+UseG1GC")
}
@@ -1861,6 +1913,15 @@ object NCCli extends App {
* @param args Arguments, if any, for this command.
* @param repl Whether or not running from REPL.
*/
+ private def cmdTestModel(cmd: Command, args: Seq[Argument], repl: Boolean): Unit = {
+
+ }
+
+ /**
+ * @param cmd Command descriptor.
+ * @param args Arguments, if any, for this command.
+ * @param repl Whether or not running from REPL.
+ */
private def cmdStartProbe(cmd: Command, args: Seq[Argument], repl: Boolean): Unit = {
// Ensure that there is a local server running since probe
// cannot finish its start unless there's a server to connect to.
@@ -1889,7 +1950,7 @@ object NCCli extends App {
case None ⇒ null
}
val jvmOpts = args.find(_.parameter.id == "jvmopts") match {
- case Some(arg) ⇒ stripQuotes(arg.value.get).split(" ").map(_.trim).filter(_.nonEmpty).toSeq
+ case Some(arg) ⇒ U.splitTrimFilter(stripQuotes(arg.value.get), " ")
case None ⇒ Seq("-ea", "-Xms1024m")
}
@@ -2683,7 +2744,7 @@ object NCCli extends App {
tbl += (" Pool increment", s"${g(beacon.dbPoolInc)}")
tbl += (" Reset on start", s"${g(beacon.dbInit)}")
tbl += ("REST:", "")
- tbl += (" Endpoint", s"http://${g(beacon.restEndpoint)}") // TODO: https?
+ tbl += (" Endpoint", s"${g("http://" + beacon.restEndpoint)}") // TODO: https?
tbl += (" API provider", s"${g(beacon.restApi)}")
tbl += ("Probe:", "")
tbl += (" Uplink", s"${g(beacon.upLink)}")
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/tools/sqlgen/impl/NCSqlModelGeneratorImpl.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/tools/sqlgen/impl/NCSqlModelGeneratorImpl.scala
index 3783925..bba6708 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/tools/sqlgen/impl/NCSqlModelGeneratorImpl.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/tools/sqlgen/impl/NCSqlModelGeneratorImpl.scala
@@ -57,7 +57,7 @@ object NCSqlModelGeneratorImpl {
val nameLc: String
val elmNameLc: String
- private val nameWs = elmNameLc.replaceAll("_", " ").split(" ").filter(_.nonEmpty).mkString(" ")
+ private val nameWs = U.normalize(elmNameLc.replaceAll("_"," ")," ")
lazy val synonym =
if (elmNameLc == nameWs)
@@ -144,7 +144,7 @@ object NCSqlModelGeneratorImpl {
* @return
*/
private def mkPrefixFun(s: String): String ⇒ String = {
- val arr = s.split(",").map(_.trim).filter(_.nonEmpty)
+ val arr = U.splitTrimFilter(s, ",")
z ⇒ (for (fix ← arr if z.startsWith(fix)) yield z.substring(fix.length)).headOption.getOrElse(z)
}
@@ -156,7 +156,7 @@ object NCSqlModelGeneratorImpl {
* @return
*/
private def mkSuffixFun(s: String): String ⇒ String = {
- val arr = s.split(",").map(_.trim).filter(_.nonEmpty)
+ val arr = U.splitTrimFilter(s, ",")
z ⇒ (for (fix ← arr if z.endsWith(fix)) yield z.substring(0, z.length - fix.length)).headOption.getOrElse(z)
}
@@ -168,12 +168,12 @@ object NCSqlModelGeneratorImpl {
*/
private def mkPredicate(s: String): (String, String) ⇒ Boolean = {
def convert(expr: String): (String, String) ⇒ Boolean = {
- val s = expr.split("#").filter(!_.isEmpty)
+ val s = U.splitTrimFilter(expr, "#")
val (tbl: String, col: String) = s.length match {
- case 1 if !expr.contains("#") ⇒ (s(0), "") // 'table'
- case 1 if expr.contains("#") ⇒ ("", s(0)) // '#column'
- case 2 ⇒ (s(0), s(1)) // 'table#column'
+ case 1 if !expr.contains("#") ⇒ (s.head, "") // 'table'
+ case 1 if expr.contains("#") ⇒ ("", s.head) // '#column'
+ case 2 ⇒ (s.head, s(1)) // 'table#column'
case _ ⇒ throw new Exception(s"Invalid table and/or column filter: $C$expr$RST")
}
@@ -203,7 +203,7 @@ object NCSqlModelGeneratorImpl {
}
}
- val predicates = s.split(";").map(_.trim()).map(convert)
+ val predicates = U.splitTrimFilter(s,";").map(convert)
(tbl: String, col: String) ⇒ predicates.exists(_(tbl, col))
}
@@ -235,7 +235,7 @@ object NCSqlModelGeneratorImpl {
* @return
*/
private def removeSeqDups(syn: String): String = {
- val words = syn.split(" ").filter(_.nonEmpty)
+ val words = U.splitTrimFilter(syn, " ")
words
.zip(words.map(NCNlpPorterStemmer.stem))
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/NCProbeBoot.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/NCProbeBoot.scala
index e8ec054..3aab814 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/NCProbeBoot.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/NCProbeBoot.scala
@@ -85,7 +85,7 @@ private [probe] object NCProbeBoot extends LazyLogging with NCOpenCensusTrace {
) {
lazy val upLinkString = s"${upLink._1}:${upLink._2}"
lazy val downLinkString = s"${downLink._1}:${downLink._2}"
- lazy val modelsSeq: Seq[String] = models.split(",").map(_.trim)
+ lazy val modelsSeq: Seq[String] = U.splitTrimFilter(models,",")
}
private def mkDefault(): Config = {
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCDeployManager.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCDeployManager.scala
index d7fb055..8e7f0b7 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCDeployManager.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCDeployManager.scala
@@ -30,7 +30,7 @@ import org.apache.nlpcraft.common.ascii.NCAsciiTable
import org.apache.nlpcraft.common.config.NCConfigurable
import org.apache.nlpcraft.common.makro.NCMacroParser
import org.apache.nlpcraft.common.nlp.core.{NCNlpCoreManager, NCNlpPorterStemmer}
-import org.apache.nlpcraft.common.util.NCUtils.{DSL_FIX, REGEX_FIX, escapeJson}
+import org.apache.nlpcraft.common.util.NCUtils.{DSL_FIX, REGEX_FIX}
import org.apache.nlpcraft.model._
import org.apache.nlpcraft.model.factories.basic.NCBasicModelFactory
import org.apache.nlpcraft.model.intent.impl.{NCIntentDslCompiler, NCIntentSolver}
@@ -255,10 +255,8 @@ object NCDeployManager extends NCService with DecorateAsScala {
var curr = 0
val len = x.length - (2 + 2) // 2 is a prefix/suffix length. Hack...
- def splitUp(s: String): Seq[String] = s.split(" ").map(_.trim).filter(_.nonEmpty).toSeq
-
def processChunk(fix: String): Unit = {
- chunks ++= splitUp(x.substring(start, curr))
+ chunks ++= U.splitTrimFilter(x.substring(start, curr), " ")
x.indexOf(fix, curr + fix.length) match {
case -1 ⇒
@@ -286,7 +284,7 @@ object NCDeployManager extends NCService with DecorateAsScala {
curr += 1
}
- chunks ++= splitUp(x.substring(start))
+ chunks ++= U.splitTrimFilter(x.substring(start), " ")
chunks.map(mkChunk(mdlId, _))
}
@@ -627,7 +625,7 @@ object NCDeployManager extends NCService with DecorateAsScala {
case None ⇒ // No-op.
}
- data ++= Config.models.split(",").map(_.trim).map(makeModelWrapper)
+ data ++= U.splitTrimFilter(Config.models, ",").map(makeModelWrapper)
Config.jarsFolder match {
case Some(jarsFolder) ⇒
@@ -790,7 +788,7 @@ object NCDeployManager extends NCService with DecorateAsScala {
s"mdlId=${mdl.getId}, " +
s"elm=${elm.toString}" +
s"]")
- else if (elm.getId.length == 0)
+ else if (elm.getId.isEmpty)
throw new NCE(s"Model element ID cannot be empty [" +
s"mdlId=${mdl.getId}, " +
s"elm=${elm.toString}]" +
@@ -932,7 +930,7 @@ object NCDeployManager extends NCService with DecorateAsScala {
if (startsAndEnds(REGEX_FIX, chunk)) {
val ptrn = stripSuffix(REGEX_FIX, chunk)
- if (ptrn.length > 0)
+ if (ptrn.nonEmpty)
try
NCProbeSynonymChunk(kind = REGEX, origText = chunk, regex = Pattern.compile(ptrn))
catch {
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/limit/NCLimitEnricher.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/limit/NCLimitEnricher.scala
index 43c3748..9e57605 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/limit/NCLimitEnricher.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/limit/NCLimitEnricher.scala
@@ -20,11 +20,12 @@ package org.apache.nlpcraft.probe.mgrs.nlp.enrichers.limit
import java.io.Serializable
import io.opencensus.trace.Span
+import org.apache.nlpcraft.common._
import org.apache.nlpcraft.common.makro.NCMacroParser
import org.apache.nlpcraft.common.nlp.core.NCNlpCoreManager
import org.apache.nlpcraft.common.nlp.numeric.{NCNumeric, NCNumericManager}
import org.apache.nlpcraft.common.nlp.{NCNlpSentence, NCNlpSentenceNote, NCNlpSentenceToken}
-import org.apache.nlpcraft.common.{NCE, NCService}
+import org.apache.nlpcraft.common.NCService
import org.apache.nlpcraft.probe.mgrs.NCProbeModel
import org.apache.nlpcraft.probe.mgrs.nlp.NCProbeEnricher
@@ -194,7 +195,7 @@ object NCLimitEnricher extends NCProbeEnricher {
limits= {
// Few numbers cannot be in on template.
- require(SYNONYMS.forall(_.split(" ").map(_.trim).count(_ == CD) < 2))
+ require(SYNONYMS.forall(s ⇒ U.splitTrimFilter(s, " ").count(_ == CD) < 2))
def toMacros(seq: Iterable[String]): String = seq.mkString("|")
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/geo/tools/metro/NCGeoMetroGenerator.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/geo/tools/metro/NCGeoMetroGenerator.scala
index 35616dc..8093576 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/geo/tools/metro/NCGeoMetroGenerator.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/geo/tools/metro/NCGeoMetroGenerator.scala
@@ -40,10 +40,10 @@ object NCGeoMetroGenerator extends App {
case class Holder(name: String)
private def deleteBrackets(s: String): String =
- s.replaceAll("\\(", " ").replaceAll("\\)", " ").split(" ").map(_.trim).filter(_.nonEmpty).mkString(" ")
+ U.normalize(s.replaceAll("\\(", " ").replaceAll("\\)", " "), " ")
private def generate() {
- val lines = U.readPath(in, "UTF-8").toSeq.map(_.trim).filter(_.nonEmpty)
+ val lines = U.readPath(in, "UTF-8").map(_.trim).filter(_.nonEmpty)
// Skips header.
val metro = lines.tail.filter(!_.contains("(not set)")).map(line ⇒ Holder(line.takeWhile(_ != ',')))
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/date/NCDateEnricher.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/date/NCDateEnricher.scala
index 2070ef9..44b8a49 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/date/NCDateEnricher.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/date/NCDateEnricher.scala
@@ -57,7 +57,7 @@ object NCDateEnricher extends NCServerEnricher {
// Preposition data holder.
case class P(text: String) {
- val words: Seq[String] = text.split(" ").filter(!_.trim.isEmpty).toSeq
+ val words: Seq[String] = U.splitTrimFilter(text," ")
val length: Int = words.length
}
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/date/NCDateParser.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/date/NCDateParser.scala
index 3abfa49..bb5daea 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/date/NCDateParser.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/date/NCDateParser.scala
@@ -19,6 +19,7 @@ package org.apache.nlpcraft.server.nlp.enrichers.date
import java.util.{Locale, Calendar ⇒ C}
import scala.collection.JavaConverters._
+import org.apache.nlpcraft.common._
/**
* Date parser.
@@ -344,7 +345,7 @@ object NCDateParser {
}).getTimeInMillis
}
- private def parseInt(s: String): Option[Int] = if (!s.isEmpty) Some(s.toInt) else None
+ private def parseInt(s: String): Option[Int] = if (s.nonEmpty) Some(s.toInt) else None
private def isSign(ch: Char) = ch == '+' || ch == '-'
@@ -364,7 +365,7 @@ object NCDateParser {
private[date] def calculatePart(fns: String, base: Long): PartResult = {
var res = PartResult(base, base, "", Seq.empty[String])
- for (fn ← fns.split(",").map(_.trim)) {
+ for (fn ← U.splitTrimFilter(fns, ",")) {
val resFrom = res.from
def after(heads: String*): String = fn.drop(heads.map(_.length).sum)
@@ -437,7 +438,7 @@ object NCDateParser {
if (shift != 0)
shift = years - shift
- // Should't be in one function call (last day is relative)
+ // Should not be in one function call (last day is relative).
set(c, C.YEAR → (curYear + shift))
set(c, C.DAY_OF_YEAR → c.getActualMaximum(C.DAY_OF_YEAR))
})
@@ -448,7 +449,7 @@ object NCDateParser {
def ld3M(map3m: Map[Int, Int]): PartResult = lastDay((c: C) ⇒ {
val n = map3m(MONTH_NUM_MAP(c.get(C.MONTH)))
- // Should't be in one function call (last day is relative)
+ // Should not be in one function call (last day is relative).
// Note that keys in `map3m` sorted.
set(c, C.MONTH → NUM_MONTH_MAP(map3m.filter(_._2 == n).keys.toSeq.max))
set(c, C.DAY_OF_MONTH → c.getActualMaximum(C.DAY_OF_MONTH))
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/stopword/NCStopWordEnricher.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/stopword/NCStopWordEnricher.scala
index 71dafc7..71db333 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/stopword/NCStopWordEnricher.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/stopword/NCStopWordEnricher.scala
@@ -273,7 +273,7 @@ object NCStopWordEnricher extends NCServerEnricher {
drop(idxPos + 1).
trim.split(" ").
map(_.trim.toUpperCase).
- filter(!_.isEmpty).
+ filter(_.nonEmpty).
toSeq.
map(p ⇒ if (p.head == '~') p.drop(1).trim → false else p → true).
toMap
@@ -678,7 +678,7 @@ object NCStopWordEnricher extends NCServerEnricher {
val m =
readStopWords(
U.readResource("stopwords/stop_words.txt", "UTF-8", logger).
- map(_.trim).filter(s ⇒ !s.isEmpty && !s.startsWith("#")).toSeq
+ map(_.trim).filter(s ⇒ s.nonEmpty && !s.startsWith("#")).toSeq
)
stopWords = m(false)
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/preproc/NCPreProcessManager.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/preproc/NCPreProcessManager.scala
index 1076f3d..9d0f6f4 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/preproc/NCPreProcessManager.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/preproc/NCPreProcessManager.scala
@@ -18,7 +18,7 @@
package org.apache.nlpcraft.server.nlp.preproc
import io.opencensus.trace.Span
-import org.apache.nlpcraft.common.NCService
+import org.apache.nlpcraft.common._
import org.apache.nlpcraft.server.nlp.spell.NCSpellCheckManager
import scala.collection._
@@ -117,20 +117,20 @@ object NCPreProcessManager extends NCService {
/**
*
- * @param sen Input sentence.
+ * @param sen Input sentence.
* @param spellCheck Spell check flag.
* @return
*/
private def collect(sen: Seq[String], spellCheck: Boolean): String =
if (spellCheck)
- sen.map(NCSpellCheckManager.check).map(_.trim).filter(!_.isEmpty).mkString(" ")
+ U.trimFilter(sen.map(NCSpellCheckManager.check)).mkString(" ")
else
- sen.map(_.trim).filter(!_.isEmpty).mkString(" ")
+ U.trimFilter(sen).mkString(" ")
/**
* Performs all pre-processing and normalizes the given input raw text.
*
- * @param rawTxt Raw text to normalize.
+ * @param rawTxt Raw text to normalize.
* @param spellCheck Using spell checking flag.
* @return Normalized, pre-processed text.
* @param parent Optional parent span.
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/spell/NCSpellCheckManager.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/spell/NCSpellCheckManager.scala
index 8ad7b73..02bafce 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/spell/NCSpellCheckManager.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/spell/NCSpellCheckManager.scala
@@ -35,7 +35,7 @@ object NCSpellCheckManager extends NCService {
private def isWordUpper(s: String): Boolean = s.forall(_.isUpper)
private def isHeadUpper(s: String): Boolean = s.head.isUpper
- private def split(s: String): Seq[String] = s.split(" ").filter(!_.isEmpty)
+ private def split(s: String): Seq[String] = U.splitTrimFilter(s, " ")
private def processCase(s: String, sample: String): String =
if (isWordUpper(sample))
s.toUpperCase
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/sql/NCSql.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/sql/NCSql.scala
index 585f9ed..05ba230 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/sql/NCSql.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/sql/NCSql.scala
@@ -138,7 +138,7 @@ object NCSql extends LazyLogging {
sql.replace("\n", " ").
replace("\t", " ").
split(" ").
- filter(!_.isEmpty).
+ filter(_.nonEmpty).
mkString(" ").
trim
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/sql/NCSqlManager.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/sql/NCSqlManager.scala
index f8ea0f2..d23a632 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/sql/NCSqlManager.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/sql/NCSqlManager.scala
@@ -1026,7 +1026,7 @@ object NCSqlManager extends NCService with NCIgniteInstance {
mkString("\n").
split(";").
map(_.trim).
- filter(!_.isEmpty).
+ filter(_.nonEmpty).
foreach(p ⇒ NCSql.ddl(p))
}
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/sugsyn/NCSuggestSynonymManager.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/sugsyn/NCSuggestSynonymManager.scala
index b15cc8e..fc17201 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/sugsyn/NCSuggestSynonymManager.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/sugsyn/NCSuggestSynonymManager.scala
@@ -108,7 +108,7 @@ object NCSuggestSynonymManager extends NCService {
}
case class SuggestionResult(synonym: String, score: Double)
- private def split(s: String): Seq[String] = s.split(" ").toSeq.map(_.trim).filter(_.nonEmpty)
+ private def split(s: String): Seq[String] = U.splitTrimFilter(s, " ")
private def toStem(s: String): String = split(s).map(NCNlpPorterStemmer.stem).mkString(" ")
private def toStemWord(s: String): String = NCNlpPorterStemmer.stem(s)
diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/model/intent/impl/NCIntentSolverEngineSpec.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/model/intent/impl/NCIntentSolverEngineSpec.scala
index 85f75a5..9784972 100644
--- a/nlpcraft/src/test/scala/org/apache/nlpcraft/model/intent/impl/NCIntentSolverEngineSpec.scala
+++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/model/intent/impl/NCIntentSolverEngineSpec.scala
@@ -17,6 +17,7 @@
package org.apache.nlpcraft.model.intent.impl
+import org.apache.nlpcraft.common._
import org.apache.nlpcraft.model.intent.utils.NCDslFlowItem
import org.junit.jupiter.api.Assertions.assertTrue
import org.junit.jupiter.api.Test
@@ -34,7 +35,7 @@ class NCIntentSolverEngineSpec {
private def matchFlow(hist: String, flow: (String/*Intent ID*/, Int/*min*/, Int/*max*/)*): Boolean = {
NCIntentSolverEngine.matchFlow(
flow.toArray.map(x ⇒ NCDslFlowItem(x._1.split('|').map(_.trim), x._2, x._3)),
- hist.split(" ").map(_.trim)
+ U.splitTrimFilter(hist, " ")
)
}
diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/model/tools/sqlgen/impl/NCSqlModelGeneratorImplSpec.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/model/tools/sqlgen/impl/NCSqlModelGeneratorImplSpec.scala
index 3d75592..39b6978 100644
--- a/nlpcraft/src/test/scala/org/apache/nlpcraft/model/tools/sqlgen/impl/NCSqlModelGeneratorImplSpec.scala
+++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/model/tools/sqlgen/impl/NCSqlModelGeneratorImplSpec.scala
@@ -17,6 +17,7 @@
package org.apache.nlpcraft.model.tools.sqlgen.impl
+import org.apache.nlpcraft.common._
import org.junit.jupiter.api.Assertions.assertTrue
import org.junit.jupiter.api.Test
@@ -45,7 +46,7 @@ class NCSqlModelGeneratorImplSpec {
* @return
*/
private def mkPrefixFun(s: String): String ⇒ String = {
- val arr = s.split(",").map(_.trim).filter(_.nonEmpty)
+ val arr = U.splitTrimFilter(s, ",")
z ⇒ (for (fix ← arr if z.startsWith(fix)) yield z.substring(fix.length)).headOption.getOrElse(z)
}
@@ -57,7 +58,7 @@ class NCSqlModelGeneratorImplSpec {
* @return
*/
private def mkSuffixFun(s: String): String ⇒ String = {
- val arr = s.split(",").map(_.trim).filter(_.nonEmpty)
+ val arr = U.splitTrimFilter(s, ",")
z ⇒ (for (fix ← arr if z.endsWith(fix)) yield z.substring(0, z.length - fix.length)).headOption.getOrElse(z)
}