You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nlpcraft.apache.org by se...@apache.org on 2020/06/01 14:27:15 UTC
[incubator-nlpcraft] 01/01: ML server support initial commit.
This is an automated email from the ASF dual-hosted git repository.
sergeykamov pushed a commit to branch NLPCRAFT-70
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
commit 456d098647242c9ce9d823f34cb9a9abc6b7ed30
Author: Sergey Kamov <se...@apache.org>
AuthorDate: Mon Jun 1 17:27:02 2020 +0300
ML server support initial commit.
---
src/main/resources/nlpcraft.conf | 5 +-
.../nlpcraft/common/config/NCConfigurable.scala | 7 ++
.../apache/nlpcraft/common/nlp/NCNlpSentence.scala | 6 +-
.../cars/CarsModel.scala} | 28 ++---
.../apache/nlpcraft/examples/cars/cars_model.yaml | 33 ++++++
.../scala/org/apache/nlpcraft/model/NCElement.java | 5 +
.../apache/nlpcraft/model/NCModelFileAdapter.java | 5 +
.../nlpcraft/model/impl/json/NCElementJson.java | 17 +++
.../probe/mgrs/conn/NCConnectionManager.scala | 19 ++-
.../org/apache/nlpcraft/server/NCServer.scala | 3 +
.../nlpcraft/server/mdo/NCProbeModelMdo.scala | 6 +-
...ProbeModelMdo.scala => NCProbeModelMlMdo.scala} | 21 +---
.../NCProbeModelMdo.scala => ml/NCMlManager.scala} | 36 +++---
.../server/nlp/core/NCNlpServerManager.scala | 3 +-
.../server/nlp/core/spacy/NCSpaCyNerEnricher.scala | 2 +-
.../nlp/enrichers/NCServerEnrichmentManager.scala | 72 ++++++++----
.../server/nlp/enrichers/ml/NCMlEnricher.scala | 59 ++++++++++
.../nlpcraft/server/probe/NCProbeManager.scala | 130 ++++++++++++++-------
.../nlpcraft/server/query/NCQueryManager.scala | 9 +-
19 files changed, 337 insertions(+), 129 deletions(-)
diff --git a/src/main/resources/nlpcraft.conf b/src/main/resources/nlpcraft.conf
index 70674c8..89c7f50 100644
--- a/src/main/resources/nlpcraft.conf
+++ b/src/main/resources/nlpcraft.conf
@@ -203,7 +203,10 @@ nlpcraft {
]
# If Spacy is enabled as a token provider (value 'spacy') - defines Spacy proxy URL.
- # spacy.proxy.url=http://localhost:5002
+ # spacy.proxy.url="http://localhost:5002"
+
+ # If ML enricher is enabled as a token provider - defines ML server URL.
+ ml.url="http://localhost:5000"
}
# +------------------------------------------+
diff --git a/src/main/scala/org/apache/nlpcraft/common/config/NCConfigurable.scala b/src/main/scala/org/apache/nlpcraft/common/config/NCConfigurable.scala
index 9d523a5..90daed8 100644
--- a/src/main/scala/org/apache/nlpcraft/common/config/NCConfigurable.scala
+++ b/src/main/scala/org/apache/nlpcraft/common/config/NCConfigurable.scala
@@ -247,6 +247,13 @@ trait NCConfigurable extends LazyLogging {
if (!hocon.hasPath(name)) None else Some(hocon.getStringList(name).asScala)
/**
+ * Gets flag.
+ *
+ * @param name Full configuration property path (name).
+ */
+ def hasProperty(name: String): Boolean = hocon.hasPath(name)
+
+ /**
*
* @param errMsgs Optional error messages.
*/
diff --git a/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala b/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala
index db05f0d..25e3bdc 100644
--- a/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala
+++ b/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala
@@ -22,6 +22,7 @@ import java.util.Collections
import org.apache.nlpcraft.common.NCE
import org.apache.nlpcraft.common.nlp.pos.NCPennTreebank
+import org.apache.nlpcraft.server.mdo.NCProbeModelMlMdo
import scala.collection.JavaConverters._
import scala.collection.mutable.ArrayBuffer
@@ -447,6 +448,7 @@ import org.apache.nlpcraft.common.nlp.NCNlpSentence._
* @param text Normalized text.
* @param weight Weight.
* @param enabledBuiltInToks Enabled built-in tokens.
+ * @param mlData ML model data holder.
* @param tokens Initial buffer.
*/
class NCNlpSentence(
@@ -454,6 +456,7 @@ class NCNlpSentence(
val text: String,
val weight: Double,
val enabledBuiltInToks: Set[String],
+ val mlData: NCProbeModelMlMdo,
override val tokens: ArrayBuffer[NCNlpSentenceToken] = new ArrayBuffer[NCNlpSentenceToken](32)
) extends NCNlpSentenceTokenBuffer(tokens) with java.io.Serializable {
@transient
@@ -462,7 +465,8 @@ class NCNlpSentence(
private def calcHash(): Int = Seq(srvReqId, text, enabledBuiltInToks, tokens).map(_.hashCode()).foldLeft(0)((a, b) ⇒ 31 * a + b)
// Deep copy.
- override def clone(): NCNlpSentence = new NCNlpSentence(srvReqId, text, weight, enabledBuiltInToks, tokens.map(_.clone()))
+ override def clone(): NCNlpSentence =
+ new NCNlpSentence(srvReqId, text, weight, enabledBuiltInToks, mlData, tokens.map(_.clone()))
/**
* Utility method that gets set of notes for given note type collected from
diff --git a/src/main/scala/org/apache/nlpcraft/server/mdo/NCProbeModelMdo.scala b/src/main/scala/org/apache/nlpcraft/examples/cars/CarsModel.scala
similarity index 58%
copy from src/main/scala/org/apache/nlpcraft/server/mdo/NCProbeModelMdo.scala
copy to src/main/scala/org/apache/nlpcraft/examples/cars/CarsModel.scala
index 1510c4b..8d65be1 100644
--- a/src/main/scala/org/apache/nlpcraft/server/mdo/NCProbeModelMdo.scala
+++ b/src/main/scala/org/apache/nlpcraft/examples/cars/CarsModel.scala
@@ -15,26 +15,16 @@
* limitations under the License.
*/
-package org.apache.nlpcraft.server.mdo
+package org.apache.nlpcraft.examples.cars
-import org.apache.nlpcraft.server.mdo.impl._
+import org.apache.nlpcraft.model.{NCIntentTerm, _}
-/**
- * Probe model MDO.
- */
-@NCMdoEntity(sql = false)
-case class NCProbeModelMdo(
- @NCMdoField id: String,
- @NCMdoField name: String,
- @NCMdoField version: String,
- @NCMdoField enabledBuiltInTokens: Set[String]
-) extends NCAnnotatedMdo[NCProbeModelMdo] {
- override def hashCode(): Int = s"$id$name".hashCode()
-
- override def equals(obj: Any): Boolean = {
- obj match {
- case x: NCProbeModelMdo ⇒ x.id == id
- case _ ⇒ false
- }
+// TODO:
+class CarsModel extends NCModelFileAdapter("org/apache/nlpcraft/examples/cars/cars_model.yaml") {
+ @NCIntentRef("brand")
+ def onMatch(
+ @NCIntentTerm("brand") brand: NCToken
+ ): NCResult = {
+ NCResult.text(s"Brand '${brand.origText}.")
}
}
diff --git a/src/main/scala/org/apache/nlpcraft/examples/cars/cars_model.yaml b/src/main/scala/org/apache/nlpcraft/examples/cars/cars_model.yaml
new file mode 100644
index 0000000..0ce66aa
--- /dev/null
+++ b/src/main/scala/org/apache/nlpcraft/examples/cars/cars_model.yaml
@@ -0,0 +1,33 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+id: "nlpcraft.cars.ex"
+name: "Cars Example Model"
+version: "1.0"
+description: "Cars example model."
+examples:
+ - "I like drive my new BMW."
+enabledBuiltInTokens: [] # Don't use any built-in tokens.
+elements:
+ - id: "cars:brand"
+ description: "Any car"
+ synonyms:
+ - "BMW"
+ - "Mercedez"
+ mlSupport: true
+intents:
+ - "intent=brand term(brand)={id == 'cars:brand'}"
\ No newline at end of file
diff --git a/src/main/scala/org/apache/nlpcraft/model/NCElement.java b/src/main/scala/org/apache/nlpcraft/model/NCElement.java
index 4c3422d..886b991 100644
--- a/src/main/scala/org/apache/nlpcraft/model/NCElement.java
+++ b/src/main/scala/org/apache/nlpcraft/model/NCElement.java
@@ -327,4 +327,9 @@ public interface NCElement extends NCMetadata, Serializable {
default NCValueLoader getValueLoader() {
return null;
}
+
+ // TODO:
+ default boolean mlSupport() {
+ return false;
+ }
}
diff --git a/src/main/scala/org/apache/nlpcraft/model/NCModelFileAdapter.java b/src/main/scala/org/apache/nlpcraft/model/NCModelFileAdapter.java
index 85862c7..fc0c209 100644
--- a/src/main/scala/org/apache/nlpcraft/model/NCModelFileAdapter.java
+++ b/src/main/scala/org/apache/nlpcraft/model/NCModelFileAdapter.java
@@ -302,6 +302,11 @@ abstract public class NCModelFileAdapter extends NCModelAdapter {
return syns;
}
+ @Override
+ public boolean mlSupport() {
+ return js.isMlSupport();
+ }
+
private NCValueLoader mkLoader(String clsName) {
NCValueLoader ldr = NCUtils.mkObject(clsName);
diff --git a/src/main/scala/org/apache/nlpcraft/model/impl/json/NCElementJson.java b/src/main/scala/org/apache/nlpcraft/model/impl/json/NCElementJson.java
index c931d01..36ddcac 100644
--- a/src/main/scala/org/apache/nlpcraft/model/impl/json/NCElementJson.java
+++ b/src/main/scala/org/apache/nlpcraft/model/impl/json/NCElementJson.java
@@ -31,6 +31,7 @@ public class NCElementJson {
private Map<String, Object> metadata = new HashMap<>();
private NCValueJson[] values = new NCValueJson[0];
private String valueLoader;
+ private boolean mlSupport;
/**
*
@@ -159,4 +160,20 @@ public class NCElementJson {
public void setValueLoader(String valueLoader) {
this.valueLoader = valueLoader;
}
+
+ /**
+ *
+ * @return
+ */
+ public boolean isMlSupport() {
+ return mlSupport;
+ }
+
+ /**
+ *
+ * @param mlSupport
+ */
+ public void setMlSupport(boolean mlSupport) {
+ this.mlSupport = mlSupport;
+ }
}
diff --git a/src/main/scala/org/apache/nlpcraft/probe/mgrs/conn/NCConnectionManager.scala b/src/main/scala/org/apache/nlpcraft/probe/mgrs/conn/NCConnectionManager.scala
index a59eea0..860c572 100644
--- a/src/main/scala/org/apache/nlpcraft/probe/mgrs/conn/NCConnectionManager.scala
+++ b/src/main/scala/org/apache/nlpcraft/probe/mgrs/conn/NCConnectionManager.scala
@@ -35,6 +35,7 @@ import org.apache.nlpcraft.probe.mgrs.NCProbeMessage
import org.apache.nlpcraft.probe.mgrs.cmd.NCCommandManager
import org.apache.nlpcraft.probe.mgrs.model.NCModelManager
+import scala.collection.JavaConverters._
import scala.collection.mutable
/**
@@ -234,7 +235,21 @@ object NCConnectionManager extends NCService {
// util.HashSet created to avoid scala collections serialization error.
// Seems to be a Scala bug.
- (mdl.getId, mdl.getName, mdl.getVersion, new util.HashSet[String](mdl.getEnabledBuiltInTokens))
+ (
+ mdl.getId,
+ mdl.getName,
+ mdl.getVersion,
+ new util.HashSet[String](mdl.getEnabledBuiltInTokens),
+ new util.HashMap[String, util.Set[String]](
+ mdl.getElements.asScala.filter(_.mlSupport()).
+ map(p ⇒
+ p.getId →
+ new util.HashSet[String](
+ p.getSynonyms.asScala.toSet.filter(!_.contains(" ")).asJava)
+ ).toMap.asJava
+ ),
+ new util.HashSet[String](mdl.getExamples)
+ )
})
), cryptoKey)
@@ -247,6 +262,8 @@ object NCConnectionManager extends NCService {
case "S2P_PROBE_NOT_FOUND" ⇒ err("Probe failed to start due to unknown error.")
case "S2P_PROBE_VERSION_MISMATCH" ⇒ err(s"REST server does not support probe version: ${ver.version}")
case "S2P_PROBE_UNSUPPORTED_TOKENS_TYPES" ⇒ err(s"REST server does not support some model enabled tokes types.")
+ case "S2P_PROBE_UNSUPPORTED_ML" ⇒ err(s"REST server does not support ML enabled elements.")
+ case "S2P_PROBE_ML_ERROR" ⇒ err(s"REST server ML elements initialization error.")
case "S2P_PROBE_OK" ⇒ logger.trace("Uplink handshake OK.") // Bingo!
case _ ⇒ err(s"Unknown REST server message: ${resp.getType}")
}
diff --git a/src/main/scala/org/apache/nlpcraft/server/NCServer.scala b/src/main/scala/org/apache/nlpcraft/server/NCServer.scala
index 89f16ae..1067340 100644
--- a/src/main/scala/org/apache/nlpcraft/server/NCServer.scala
+++ b/src/main/scala/org/apache/nlpcraft/server/NCServer.scala
@@ -34,6 +34,7 @@ import org.apache.nlpcraft.server.feedback.NCFeedbackManager
import org.apache.nlpcraft.server.geo.NCGeoManager
import org.apache.nlpcraft.server.ignite.{NCIgniteInstance, NCIgniteRunner}
import org.apache.nlpcraft.server.lifecycle.NCServerLifecycleManager
+import org.apache.nlpcraft.server.ml.NCMlManager
import org.apache.nlpcraft.server.nlp.core.NCNlpServerManager
import org.apache.nlpcraft.server.nlp.enrichers.NCServerEnrichmentManager
import org.apache.nlpcraft.server.nlp.preproc.NCPreProcessManager
@@ -127,6 +128,7 @@ object NCServer extends App with NCIgniteInstance with LazyLogging with NCOpenCe
NCQueryManager.start(span)
NCRestManager.start(span)
+ NCMlManager.start(span)
// Lifecycle callback.
NCServerLifecycleManager.afterStart()
@@ -142,6 +144,7 @@ object NCServer extends App with NCIgniteInstance with LazyLogging with NCOpenCe
startScopedSpan("stopManagers") { span ⇒
Seq(
+ NCMlManager,
NCRestManager,
NCQueryManager,
NCFeedbackManager,
diff --git a/src/main/scala/org/apache/nlpcraft/server/mdo/NCProbeModelMdo.scala b/src/main/scala/org/apache/nlpcraft/server/mdo/NCProbeModelMdo.scala
index 1510c4b..38a4130 100644
--- a/src/main/scala/org/apache/nlpcraft/server/mdo/NCProbeModelMdo.scala
+++ b/src/main/scala/org/apache/nlpcraft/server/mdo/NCProbeModelMdo.scala
@@ -27,14 +27,14 @@ case class NCProbeModelMdo(
@NCMdoField id: String,
@NCMdoField name: String,
@NCMdoField version: String,
- @NCMdoField enabledBuiltInTokens: Set[String]
+ @NCMdoField enabledBuiltInTokens: Set[String],
+ @NCMdoField mlData: NCProbeModelMlMdo
) extends NCAnnotatedMdo[NCProbeModelMdo] {
override def hashCode(): Int = s"$id$name".hashCode()
- override def equals(obj: Any): Boolean = {
+ override def equals(obj: Any): Boolean =
obj match {
case x: NCProbeModelMdo ⇒ x.id == id
case _ ⇒ false
}
- }
}
diff --git a/src/main/scala/org/apache/nlpcraft/server/mdo/NCProbeModelMdo.scala b/src/main/scala/org/apache/nlpcraft/server/mdo/NCProbeModelMlMdo.scala
similarity index 66%
copy from src/main/scala/org/apache/nlpcraft/server/mdo/NCProbeModelMdo.scala
copy to src/main/scala/org/apache/nlpcraft/server/mdo/NCProbeModelMlMdo.scala
index 1510c4b..ac20d39 100644
--- a/src/main/scala/org/apache/nlpcraft/server/mdo/NCProbeModelMdo.scala
+++ b/src/main/scala/org/apache/nlpcraft/server/mdo/NCProbeModelMlMdo.scala
@@ -20,21 +20,10 @@ package org.apache.nlpcraft.server.mdo
import org.apache.nlpcraft.server.mdo.impl._
/**
- * Probe model MDO.
+ * Probe model ML data MDO.
*/
@NCMdoEntity(sql = false)
-case class NCProbeModelMdo(
- @NCMdoField id: String,
- @NCMdoField name: String,
- @NCMdoField version: String,
- @NCMdoField enabledBuiltInTokens: Set[String]
-) extends NCAnnotatedMdo[NCProbeModelMdo] {
- override def hashCode(): Int = s"$id$name".hashCode()
-
- override def equals(obj: Any): Boolean = {
- obj match {
- case x: NCProbeModelMdo ⇒ x.id == id
- case _ ⇒ false
- }
- }
-}
+case class NCProbeModelMlMdo(
+ @NCMdoField mlElements: Map[String, Set[String]],
+ @NCMdoField examples: Set[String]
+)
\ No newline at end of file
diff --git a/src/main/scala/org/apache/nlpcraft/server/mdo/NCProbeModelMdo.scala b/src/main/scala/org/apache/nlpcraft/server/ml/NCMlManager.scala
similarity index 52%
copy from src/main/scala/org/apache/nlpcraft/server/mdo/NCProbeModelMdo.scala
copy to src/main/scala/org/apache/nlpcraft/server/ml/NCMlManager.scala
index 1510c4b..e536f0a 100644
--- a/src/main/scala/org/apache/nlpcraft/server/mdo/NCProbeModelMdo.scala
+++ b/src/main/scala/org/apache/nlpcraft/server/ml/NCMlManager.scala
@@ -15,26 +15,28 @@
* limitations under the License.
*/
-package org.apache.nlpcraft.server.mdo
+package org.apache.nlpcraft.server.ml
-import org.apache.nlpcraft.server.mdo.impl._
+import io.opencensus.trace.Span
+import org.apache.nlpcraft.common.{NCE, NCService}
+import org.apache.nlpcraft.server.mdo.NCProbeModelMlMdo
+import org.apache.nlpcraft.server.opencensus.NCOpenCensusServerStats
/**
- * Probe model MDO.
+ * TODO:
*/
-@NCMdoEntity(sql = false)
-case class NCProbeModelMdo(
- @NCMdoField id: String,
- @NCMdoField name: String,
- @NCMdoField version: String,
- @NCMdoField enabledBuiltInTokens: Set[String]
-) extends NCAnnotatedMdo[NCProbeModelMdo] {
- override def hashCode(): Int = s"$id$name".hashCode()
-
- override def equals(obj: Any): Boolean = {
- obj match {
- case x: NCProbeModelMdo ⇒ x.id == id
- case _ ⇒ false
- }
+object NCMlManager extends NCService with NCOpenCensusServerStats {
+ override def start(parent: Span = null): NCService = startScopedSpan("start", parent) { _ ⇒
+ super.start()
+ }
+
+ override def stop(parent: Span = null): Unit = startScopedSpan("stop", parent) { _ ⇒
+ super.stop()
+ }
+
+ @throws[NCE]
+ def prepareMlData(elems: Map[String, Set[String]], examples: Set[String]): NCProbeModelMlMdo = {
+ // TODO:
+ NCProbeModelMlMdo(elems, examples)
}
}
diff --git a/src/main/scala/org/apache/nlpcraft/server/nlp/core/NCNlpServerManager.scala b/src/main/scala/org/apache/nlpcraft/server/nlp/core/NCNlpServerManager.scala
index 589e64c..a8080cf 100644
--- a/src/main/scala/org/apache/nlpcraft/server/nlp/core/NCNlpServerManager.scala
+++ b/src/main/scala/org/apache/nlpcraft/server/nlp/core/NCNlpServerManager.scala
@@ -36,7 +36,6 @@ object NCNlpServerManager extends NCService {
private final val prop = "nlpcraft.server.tokenProviders"
def tokenProviders: Seq[String] = getStringList(prop)
-
def support(name: String): Boolean = tokenProviders.contains(name)
/**
@@ -48,6 +47,8 @@ object NCNlpServerManager extends NCService {
if (unsupported.nonEmpty)
abortWith(s"Configuration '$prop' contains unsupported providers: ${unsupported.mkString(",")}")
}
+
+
}
Config.check()
diff --git a/src/main/scala/org/apache/nlpcraft/server/nlp/core/spacy/NCSpaCyNerEnricher.scala b/src/main/scala/org/apache/nlpcraft/server/nlp/core/spacy/NCSpaCyNerEnricher.scala
index 740e095..29b15fe 100644
--- a/src/main/scala/org/apache/nlpcraft/server/nlp/core/spacy/NCSpaCyNerEnricher.scala
+++ b/src/main/scala/org/apache/nlpcraft/server/nlp/core/spacy/NCSpaCyNerEnricher.scala
@@ -52,7 +52,7 @@ object NCSpaCyNerEnricher extends NCService with NCNlpNerEnricher with NCIgniteI
private implicit val fmt: RootJsonFormat[SpacySpan] = jsonFormat7(SpacySpan)
private object Config extends NCConfigurable {
- def proxyUrl = getStringOrElse("nlpcraft.server.spacy.proxy.url", "http://localhost:5002")
+ lazy val proxyUrl = getStringOrElse("nlpcraft.server.spacy.proxy.url", "http://localhost:5002")
}
// NOTE: property 'vector' represented as string because Python JSON serialization requirements.
diff --git a/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/NCServerEnrichmentManager.scala b/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/NCServerEnrichmentManager.scala
index 0fd6bb1..da65e7c 100644
--- a/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/NCServerEnrichmentManager.scala
+++ b/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/NCServerEnrichmentManager.scala
@@ -25,11 +25,13 @@ import org.apache.nlpcraft.common.nlp.{NCNlpSentence, NCNlpSentenceNote, NCNlpSe
import org.apache.nlpcraft.common.{NCService, _}
import org.apache.nlpcraft.server.ignite.NCIgniteHelpers._
import org.apache.nlpcraft.server.ignite.NCIgniteInstance
+import org.apache.nlpcraft.server.mdo.NCProbeModelMlMdo
import org.apache.nlpcraft.server.nlp.core.{NCNlpNerEnricher, NCNlpServerManager}
import org.apache.nlpcraft.server.nlp.enrichers.basenlp.NCBaseNlpEnricher
import org.apache.nlpcraft.server.nlp.enrichers.coordinate.NCCoordinatesEnricher
import org.apache.nlpcraft.server.nlp.enrichers.date.NCDateEnricher
import org.apache.nlpcraft.server.nlp.enrichers.geo.NCGeoEnricher
+import org.apache.nlpcraft.server.nlp.enrichers.ml.NCMlEnricher
import org.apache.nlpcraft.server.nlp.enrichers.numeric.NCNumericEnricher
import org.apache.nlpcraft.server.nlp.enrichers.quote.NCQuoteEnricher
import org.apache.nlpcraft.server.nlp.enrichers.stopword.NCStopWordEnricher
@@ -43,13 +45,15 @@ import scala.util.control.Exception.catching
*/
object NCServerEnrichmentManager extends NCService with NCIgniteInstance {
private object Config extends NCConfigurable {
- def supportNlpCraft: Boolean = getStringList("nlpcraft.server.tokenProviders").contains("nlpcraft")
+ lazy val supportNlpCraft: Boolean = getStringList("nlpcraft.server.tokenProviders").contains("nlpcraft")
+ lazy val supportMl: Boolean = hasProperty("nlpcraft.server.ml")
}
private final val CUSTOM_PREFIXES = Set("google:", "opennlp:", "stanford:", "spacy:")
@volatile private var ners: Map[String, NCNlpNerEnricher] = _
@volatile private var supportedProviders: Set[String] = _
+ @volatile private var supportMl: Boolean = true
// NOTE: this cache is independent from datasource.
@volatile private var cache: IgniteCache[String, Holder] = _
@@ -88,6 +92,7 @@ object NCServerEnrichmentManager extends NCService with NCIgniteInstance {
* @param srvReqId Server request ID.
* @param normTxt Normalized text.
* @param enabledBuiltInToks Enabled built-in tokens.
+ * @param mlData ML model data holder ML enabled elements with synonyms.
* @param parent Optional parent span.
* @return
*/
@@ -95,9 +100,11 @@ object NCServerEnrichmentManager extends NCService with NCIgniteInstance {
srvReqId: String,
normTxt: String,
enabledBuiltInToks: Set[String],
- parent: Span = null): NCNlpSentence =
+ mlData: NCProbeModelMlMdo,
+ parent: Span = null
+ ): NCNlpSentence =
startScopedSpan("process", parent, "srvReqId" → srvReqId, "txt" → normTxt) { span ⇒
- val s = new NCNlpSentence(srvReqId, normTxt, 1, enabledBuiltInToks)
+ val s = new NCNlpSentence(srvReqId, normTxt, 1, enabledBuiltInToks, mlData)
// Server-side enrichment pipeline.
// NOTE: order of enrichers is IMPORTANT.
@@ -119,6 +126,9 @@ object NCServerEnrichmentManager extends NCService with NCIgniteInstance {
NCCoordinatesEnricher.enrich(s, span)
}
+ if (Config.supportMl && mlData.mlElements.nonEmpty)
+ NCMlEnricher.enrich(s, span)
+
ner(s, enabledBuiltInToks)
prepareAsciiTable(s).info(logger, Some(s"Sentence enriched: $normTxt"))
@@ -132,6 +142,7 @@ object NCServerEnrichmentManager extends NCService with NCIgniteInstance {
* @param srvReqId Server request ID.
* @param txt Input text.
* @param enabledBuiltInToks Set of enabled built-in token IDs.
+ * @param mlData ML model data holder.
* @param parent Optional parent span.
*/
@throws[NCE]
@@ -139,9 +150,11 @@ object NCServerEnrichmentManager extends NCService with NCIgniteInstance {
srvReqId: String,
txt: String,
enabledBuiltInToks: Set[String],
- parent: Span = null): NCNlpSentence = {
+ mlData: NCProbeModelMlMdo,
+ parent: Span = null
+ ): NCNlpSentence = {
startScopedSpan("enrichPipeline", parent, "srvReqId" → srvReqId, "txt" → txt) { span ⇒
- val normTxt = NCPreProcessManager.normalize(txt, true, span)
+ val normTxt = NCPreProcessManager.normalize(txt, spellCheck = true, span)
if (normTxt != txt)
logger.info(s"Sentence normalized to: $normTxt")
@@ -157,9 +170,9 @@ object NCServerEnrichmentManager extends NCService with NCIgniteInstance {
h.sentence
}
else
- process(srvReqId, normTxt, enabledBuiltInToks, span)
+ process(srvReqId, normTxt, enabledBuiltInToks, mlData, span)
case None ⇒
- process(srvReqId, normTxt, enabledBuiltInToks, span)
+ process(srvReqId, normTxt, enabledBuiltInToks, mlData, span)
}
}
}
@@ -182,7 +195,7 @@ object NCServerEnrichmentManager extends NCService with NCIgniteInstance {
n.keySet
.filter(name ⇒ HEADERS.find(h ⇒ isType(typ, h._1)) match {
case Some((_, (_, names))) ⇒ names.contains(name)
- case None ⇒ false
+ case None ⇒ name == "noteType"
})
.map(name ⇒
Header(
@@ -196,13 +209,12 @@ object NCServerEnrichmentManager extends NCService with NCIgniteInstance {
)
}
- val headers = s.flatten.flatMap(mkNoteHeaders).distinct.sortBy(hdr ⇒ {
- val x = HEADERS.
- find(p ⇒ isType(hdr.noteType, p._1)).
- getOrElse(throw new NCE(s"Header not found for: ${hdr.noteType}"))._2
-
- (x._1 * 100) + x._2.indexOf(hdr.noteName)
- })
+ val headers = s.flatten.flatMap(mkNoteHeaders).distinct.sortBy(hdr ⇒
+ HEADERS.find(p ⇒ isType(hdr.noteType, p._1)) match {
+ case Some((_, (idx, names))) ⇒ idx * 100 + names.indexOf(hdr.noteName)
+ case None ⇒ Integer.MAX_VALUE
+ }
+ )
val tbl = NCAsciiTable(headers.map(_.header): _*)
@@ -248,18 +260,25 @@ object NCServerEnrichmentManager extends NCService with NCIgniteInstance {
NCStopWordEnricher.start(span)
NCQuoteEnricher.start(span)
+ // Following components can be started independently.
+ val ps = scala.collection.mutable.ArrayBuffer.empty[() ⇒ Any]
+
if (Config.supportNlpCraft) {
- // These component can be started independently.
- U.executeParallel(
- () ⇒ NCDateEnricher.start(span),
- () ⇒ NCNumericEnricher.start(span),
- () ⇒ NCGeoEnricher.start(span),
- () ⇒ NCCoordinatesEnricher.start(span)
- )
+ ps += (() ⇒ NCDateEnricher.start(span))
+ ps += (() ⇒ NCNumericEnricher.start(span))
+ ps += (() ⇒ NCGeoEnricher.start(span))
+ ps += (() ⇒ NCCoordinatesEnricher.start(span))
}
+ if (Config.supportMl)
+ ps += (() ⇒ NCMlEnricher.start(span))
+
+ if (ps.nonEmpty)
+ U.executeParallel(ps :_*)
+
ners = NCNlpServerManager.getNers
supportedProviders = ners.keySet ++ (if (Config.supportNlpCraft) Set("nlpcraft") else Set.empty)
+ supportMl = Config.supportMl
super.start()
}
@@ -288,6 +307,11 @@ object NCServerEnrichmentManager extends NCService with NCIgniteInstance {
*
* @return
*/
- def getSupportedProviders: Set[String] =
- supportedProviders
+ def getSupportedProviders: Set[String] = supportedProviders
+
+ /**
+ *
+ * @return
+ */
+ def supportMlServer: Boolean = supportMl
}
diff --git a/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/ml/NCMlEnricher.scala b/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/ml/NCMlEnricher.scala
new file mode 100644
index 0000000..da672c3
--- /dev/null
+++ b/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/ml/NCMlEnricher.scala
@@ -0,0 +1,59 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nlpcraft.server.nlp.enrichers.ml
+
+import io.opencensus.trace.Span
+import org.apache.nlpcraft.common.NCService
+import org.apache.nlpcraft.common.config.NCConfigurable
+import org.apache.nlpcraft.common.nlp.{NCNlpSentence, NCNlpSentenceNote}
+import org.apache.nlpcraft.server.nlp.enrichers.NCServerEnricher
+
+object NCMlEnricher extends NCServerEnricher {
+ @volatile private var url: String = _
+
+ private object Config extends NCConfigurable {
+ lazy val url: String = getStringOrElse("nlpcraft.server.ml.url", "http://localhost:5000")
+ }
+
+ override def start(parent: Span = null): NCService = startScopedSpan("start", parent) { span ⇒
+ url = Config.url
+
+ if (url.last == '/')
+ url = url.dropRight(1)
+
+ addTags(span, "mklUrl" → url)
+
+ // Tries to access spaCy proxy server.
+ // TODO: add health check.
+
+ logger.info(s"ML server connected: $url")
+
+ super.start()
+ }
+
+ override def stop(parent: Span): Unit = startScopedSpan("stop", parent) { _ ⇒
+ super.stop()
+ }
+
+ override def enrich(ns: NCNlpSentence, parent: Span): Unit = {
+ val elem = ns.mlData.mlElements.head._1
+ val tok = ns(1)
+
+ tok.add(NCNlpSentenceNote(Seq(tok.index), elem))
+ }
+}
diff --git a/src/main/scala/org/apache/nlpcraft/server/probe/NCProbeManager.scala b/src/main/scala/org/apache/nlpcraft/server/probe/NCProbeManager.scala
index 7f11397..bf24d5a 100644
--- a/src/main/scala/org/apache/nlpcraft/server/probe/NCProbeManager.scala
+++ b/src/main/scala/org/apache/nlpcraft/server/probe/NCProbeManager.scala
@@ -36,8 +36,11 @@ import org.apache.nlpcraft.common.version.NCVersion
import org.apache.nlpcraft.common.{NCService, _}
import org.apache.nlpcraft.probe.mgrs.NCProbeMessage
import org.apache.nlpcraft.server.company.NCCompanyManager
-import org.apache.nlpcraft.server.mdo.{NCCompanyMdo, NCProbeMdo, NCProbeModelMdo, NCUserMdo}
+import org.apache.nlpcraft.server.mdo.impl.{NCAnnotatedMdo, NCMdoField}
+import org.apache.nlpcraft.server.mdo.{NCCompanyMdo, NCProbeMdo, NCProbeModelMdo, NCProbeModelMlMdo, NCUserMdo}
+import org.apache.nlpcraft.server.ml.NCMlManager
import org.apache.nlpcraft.server.nlp.enrichers.NCServerEnrichmentManager
+import org.apache.nlpcraft.server.opencensus.NCOpenCensusServerStats
import org.apache.nlpcraft.server.proclog.NCProcessLogManager
import org.apache.nlpcraft.server.query.NCQueryManager
import org.apache.nlpcraft.server.sql.NCSql
@@ -56,8 +59,8 @@ object NCProbeManager extends NCService {
private[probe] object Config extends NCConfigurable {
final private val pre = "nlpcraft.server.probe"
- def getDnHostPort = getHostPort(s"$pre.links.downLink")
- def getUpHostPort = getHostPort(s"$pre.links.upLink")
+ def getDnHostPort: (String, Integer) = getHostPort(s"$pre.links.downLink")
+ def getUpHostPort: (String, Integer) = getHostPort(s"$pre.links.upLink")
def poolSize: Int = getInt(s"$pre.poolSize")
def reconnectTimeoutMs: Long = getLong(s"$pre.reconnectTimeoutMs")
@@ -573,60 +576,105 @@ object NCProbeManager extends NCService {
if (probeApiVer != srvApiVer.version)
respond("S2P_PROBE_VERSION_MISMATCH")
else {
+ case class ProbeModel(
+ id: String,
+ name: String,
+ version: String,
+ enabledBuiltInTokens: Set[String],
+ mlElements: Map[String, Set[String]],
+ examples: Set[String]
+ )
+
val models =
- hsMsg.data[List[(String, String, String, java.util.Set[String])]]("PROBE_MODELS").
- map { case (mdlId, mdlName, mdlVer, enabledBuiltInToks) ⇒
- NCProbeModelMdo(
+ hsMsg.data[List[(
+ String,
+ String,
+ String,
+ java.util.Set[String],
+ java.util.Map[String, java.util.Set[String]],
+ java.util.Set[String]
+ )]]("PROBE_MODELS").
+ map { case (
+ mdlId,
+ mdlName,
+ mdlVer,
+ enabledBuiltInToks,
+ mlElements,
+ examples
+ ) ⇒
+ ProbeModel(
id = mdlId,
name = mdlName,
version = mdlVer,
- enabledBuiltInTokens = enabledBuiltInToks.asScala.toSet
+ enabledBuiltInTokens = enabledBuiltInToks.asScala.toSet,
+ mlElements = mlElements.asScala.map(p ⇒ p._1 → p._2.asScala.toSet).toMap,
+ examples = examples.asScala.toSet
)
}.toSet
val probeTokTypes = models.flatMap(_.enabledBuiltInTokens).map(_.takeWhile(_ != ':'))
val tokProviders = NCServerEnrichmentManager.getSupportedProviders
- if (probeTokTypes.exists(typ ⇒ !tokProviders.contains(typ)))
+ if (!NCServerEnrichmentManager.supportMlServer && models.exists(_.mlElements.nonEmpty))
+ respond("S2P_PROBE_UNSUPPORTED_ML")
+ else if (probeTokTypes.exists(typ ⇒ !tokProviders.contains(typ)))
respond("S2P_PROBE_UNSUPPORTED_TOKENS_TYPES")
else {
val probeApiDate = hsMsg.data[java.time.LocalDate]("PROBE_API_DATE")
- val holder = ProbeHolder(
- probeKey,
- NCProbeMdo(
- probeToken = hsMsg.data[String]("PROBE_TOKEN"),
- probeId = hsMsg.data[String]("PROBE_ID"),
- probeGuid = probeGuid,
- probeApiVersion = probeApiVer,
- probeApiDate = java.sql.Date.valueOf(probeApiDate),
- osVersion = hsMsg.data[String]("PROBE_OS_VER"),
- osName = hsMsg.data[String]("PROBE_OS_NAME"),
- osArch = hsMsg.data[String]("PROBE_OS_ARCH"),
- startTstamp = new java.sql.Timestamp(hsMsg.data[Long]("PROBE_START_TSTAMP")),
- tmzId = hsMsg.data[String]("PROBE_TMZ_ID"),
- tmzAbbr = hsMsg.data[String]("PROBE_TMZ_ABBR"),
- tmzName = hsMsg.data[String]("PROBE_TMZ_NAME"),
- userName = hsMsg.data[String]("PROBE_SYS_USERNAME"),
- javaVersion = hsMsg.data[String]("PROBE_JAVA_VER"),
- javaVendor = hsMsg.data[String]("PROBE_JAVA_VENDOR"),
- hostName = hsMsg.data[String]("PROBE_HOST_NAME"),
- hostAddr = hsMsg.data[String]("PROBE_HOST_ADDR"),
- macAddr = hsMsg.dataOpt[String]("PROBE_HW_ADDR").getOrElse(""),
- models = models
- ),
- null, // No downlink socket yet.
- sock,
- null, // No downlink thread yet.
- cryptoKey
- )
+ try {
+ val probelModels =
+ models.map(m ⇒
+ NCProbeModelMdo(
+ id = m.id,
+ name = m.name,
+ version = m.version,
+ enabledBuiltInTokens = m.enabledBuiltInTokens,
+ mlData = NCMlManager.prepareMlData(m.mlElements.toMap, m.examples)
+ )
+ )
- pending.synchronized {
- pending += probeKey → holder
- }
+ val holder = ProbeHolder(
+ probeKey,
+ NCProbeMdo(
+ probeToken = hsMsg.data[String]("PROBE_TOKEN"),
+ probeId = hsMsg.data[String]("PROBE_ID"),
+ probeGuid = probeGuid,
+ probeApiVersion = probeApiVer,
+ probeApiDate = java.sql.Date.valueOf(probeApiDate),
+ osVersion = hsMsg.data[String]("PROBE_OS_VER"),
+ osName = hsMsg.data[String]("PROBE_OS_NAME"),
+ osArch = hsMsg.data[String]("PROBE_OS_ARCH"),
+ startTstamp = new java.sql.Timestamp(hsMsg.data[Long]("PROBE_START_TSTAMP")),
+ tmzId = hsMsg.data[String]("PROBE_TMZ_ID"),
+ tmzAbbr = hsMsg.data[String]("PROBE_TMZ_ABBR"),
+ tmzName = hsMsg.data[String]("PROBE_TMZ_NAME"),
+ userName = hsMsg.data[String]("PROBE_SYS_USERNAME"),
+ javaVersion = hsMsg.data[String]("PROBE_JAVA_VER"),
+ javaVendor = hsMsg.data[String]("PROBE_JAVA_VENDOR"),
+ hostName = hsMsg.data[String]("PROBE_HOST_NAME"),
+ hostAddr = hsMsg.data[String]("PROBE_HOST_ADDR"),
+ macAddr = hsMsg.dataOpt[String]("PROBE_HW_ADDR").getOrElse(""),
+ models = probelModels
+ ),
+ null, // No downlink socket yet.
+ sock,
+ null, // No downlink thread yet.
+ cryptoKey
+ )
+
+ pending.synchronized {
+ pending += probeKey → holder
+ }
- // Bingo!
- respond("S2P_PROBE_OK")
+ // Bingo!
+ respond("S2P_PROBE_OK")
+ }
+ catch {
+ case _: NCE ⇒
+ // TODO: reason ?
+ respond("S2P_PROBE_ML_ERROR")
+ }
}
}
}
diff --git a/src/main/scala/org/apache/nlpcraft/server/query/NCQueryManager.scala b/src/main/scala/org/apache/nlpcraft/server/query/NCQueryManager.scala
index 8ea4d44..cc6bd9c 100644
--- a/src/main/scala/org/apache/nlpcraft/server/query/NCQueryManager.scala
+++ b/src/main/scala/org/apache/nlpcraft/server/query/NCQueryManager.scala
@@ -248,8 +248,9 @@ object NCQueryManager extends NCService with NCIgniteInstance with NCOpenCensusS
Future {
startScopedSpan("future", parent, "srvReqId" → srvReqId) { span ⇒
- val enabledBuiltInToks = NCProbeManager.getModel(mdlId, span).enabledBuiltInTokens
-
+ val mdl = NCProbeManager.getModel(mdlId, span)
+
+ val enabledBuiltInToks = mdl.enabledBuiltInTokens
val toksStr = enabledBuiltInToks.toSeq.
sortBy(t ⇒ (if (t.startsWith("nlpcraft:")) 0
else 1, t)).
@@ -257,7 +258,7 @@ object NCQueryManager extends NCService with NCIgniteInstance with NCOpenCensusS
logger.info(s"New request received " +
s"[txt='$txt0'" +
- s", usr=${usr.firstName} ${usr.lastName} (${usr.email})" +
+ s", usr=${usr.firstName.getOrElse("")} ${usr.lastName.getOrElse("")} (${usr.email.getOrElse("")})" +
s", mdlId=$mdlId" +
s", enabledBuiltInTokens=$toksStr" +
s"]")
@@ -269,7 +270,7 @@ object NCQueryManager extends NCService with NCIgniteInstance with NCOpenCensusS
company,
mdlId,
txt0,
- NCServerEnrichmentManager.enrichPipeline(srvReqId, txt0, enabledBuiltInToks),
+ NCServerEnrichmentManager.enrichPipeline(srvReqId, txt0, enabledBuiltInToks, mdl.mlData, span),
usrAgent,
rmtAddr,
data,