You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nlpcraft.apache.org by se...@apache.org on 2021/09/23 05:54:47 UTC
[incubator-nlpcraft] 02/02: WIP.
This is an automated email from the ASF dual-hosted git repository.
sergeykamov pushed a commit to branch NLPCRAFT-443-1
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
commit 267e82f5f9b888c5080e272fdd23d8db4e600aeb
Author: Sergey Kamov <sk...@gmail.com>
AuthorDate: Thu Sep 23 08:54:32 2021 +0300
WIP.
---
.../apache/nlpcraft/common/nlp/NCNlpSentence.scala | 12 ++++
.../nlpcraft/common/nlp/NCNlpSentenceNote.scala | 9 +--
.../nlpcraft/common/nlp/NCNlpSentenceToken.scala | 12 +---
.../org/apache/nlpcraft/probe/NCProbeBoot.scala | 3 +-
.../nlpcraft/probe/mgrs/NCProbeVariants.scala | 4 +-
.../probe/mgrs/nlp/NCProbeEnrichmentManager.scala | 9 +--
.../mgrs/nlp/enrichers/model/NCModelEnricher.scala | 9 +--
.../probe/mgrs/sentence/NCSentenceManager.scala | 1 -
.../{sentence => synonyms}/NCSynonymsManager.scala | 69 ++++++++++++----------
.../nlp/enrichers/NCServerEnrichmentManager.scala | 4 +-
10 files changed, 69 insertions(+), 63 deletions(-)
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala
index 0f0b462..40f5da6 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala
@@ -74,6 +74,18 @@ class NCNlpSentence(
firstProbePhase = firstProbePhase
)
+ def copy(srvReqId: Option[String]): NCNlpSentence =
+ new NCNlpSentence(
+ srvReqId = srvReqId.getOrElse(this.srvReqId),
+ text = this.text,
+ enabledBuiltInToks = this.enabledBuiltInToks,
+ tokens = this.tokens,
+ deletedNotes = this.deletedNotes,
+ initNlpNotes = this.initNlpNotes,
+ nlpTokens = this.nlpTokens,
+ firstProbePhase = this.firstProbePhase
+ )
+
/**
* Utility method that gets set of notes for given note type collected from
* tokens in this sentence. Notes are sorted in the same order they appear
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentenceNote.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentenceNote.scala
index 63ae6ca..c457aa7 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentenceNote.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentenceNote.scala
@@ -77,14 +77,11 @@ class NCNlpSentenceNote(private val values: Map[String, JSerializable]) extends
indexes,
Some(wordIndexes),
noteType,
- values.filter(p => !SKIP_CLONE.contains(p._1)).toSeq ++ params:_*
+ dataWithoutIndexes.toSeq ++ params:_*
)
- override def clone(): NCNlpSentenceNote = {
- val m = mutable.Map.empty[String, JSerializable] ++ values
-
- new NCNlpSentenceNote(m.toMap)
- }
+ override def clone(): NCNlpSentenceNote =
+ new NCNlpSentenceNote((mutable.HashMap.empty[String, JSerializable] ++ values).toMap)
/**
*
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentenceToken.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentenceToken.scala
index 4b94b98..fa9cbe6 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentenceToken.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentenceToken.scala
@@ -67,17 +67,7 @@ case class NCNlpSentenceToken(
* Shallow copy.
*/
def clone(index: Int): NCNlpSentenceToken =
- NCNlpSentenceToken(
- index,
- {
- val m = mutable.HashSet.empty[NCNlpSentenceNote]
-
- notes.foreach(n => m += n.clone())
-
- m
- },
- stopsReasons.clone()
- )
+ NCNlpSentenceToken(index, mutable.HashSet.empty[NCNlpSentenceNote] ++ notes.clone(), stopsReasons.clone())
/**
* Clones note.
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/NCProbeBoot.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/NCProbeBoot.scala
index 4df9f53..561860f 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/NCProbeBoot.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/NCProbeBoot.scala
@@ -49,7 +49,8 @@ import org.apache.nlpcraft.probe.mgrs.nlp.enrichers.sort.NCSortEnricher
import org.apache.nlpcraft.probe.mgrs.nlp.enrichers.stopword.NCStopWordEnricher
import org.apache.nlpcraft.probe.mgrs.nlp.enrichers.suspicious.NCSuspiciousNounsEnricher
import org.apache.nlpcraft.probe.mgrs.nlp.validate.NCValidateManager
-import org.apache.nlpcraft.probe.mgrs.sentence.{NCSentenceManager, NCSynonymsManager}
+import org.apache.nlpcraft.probe.mgrs.sentence.NCSentenceManager
+import org.apache.nlpcraft.probe.mgrs.synonyms.NCSynonymsManager
import java.io._
import java.util.concurrent.CompletableFuture
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeVariants.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeVariants.scala
index e876065..0596783 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeVariants.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeVariants.scala
@@ -22,7 +22,7 @@ import org.apache.nlpcraft.common.nlp.{NCNlpSentence => NlpSentence, NCNlpSenten
import org.apache.nlpcraft.common.{NCE, TOK_META_ALIASES_KEY}
import org.apache.nlpcraft.model.NCVariant
import org.apache.nlpcraft.model.impl.{NCTokenImpl, NCTokenLogger, NCVariantImpl}
-import org.apache.nlpcraft.probe.mgrs.sentence.NCSynonymsManager
+import org.apache.nlpcraft.probe.mgrs.synonyms.NCSynonymsManager
import java.io.{Serializable => JSerializable}
import java.util
@@ -268,7 +268,7 @@ object NCProbeVariants {
for ((tok, tokNlp) <- toks.zip(nlpSen) if tokNlp.isUser)
process(tok, tokNlp)
- ok = ok && NCSynonymsManager.isStillValid(srvReqId, toks.toSeq)
+ ok = ok && (!lastPhase || NCSynonymsManager.isStillValid(srvReqId, toks.toSeq))
if (ok) Some(new NCVariantImpl(toks.asJava)) else None
})
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/NCProbeEnrichmentManager.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/NCProbeEnrichmentManager.scala
index 64049ac..20dc64d 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/NCProbeEnrichmentManager.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/NCProbeEnrichmentManager.scala
@@ -43,7 +43,8 @@ import org.apache.nlpcraft.probe.mgrs.nlp.enrichers.stopword.NCStopWordEnricher
import org.apache.nlpcraft.probe.mgrs.nlp.enrichers.suspicious.NCSuspiciousNounsEnricher
import org.apache.nlpcraft.probe.mgrs.nlp.impl._
import org.apache.nlpcraft.probe.mgrs.nlp.validate._
-import org.apache.nlpcraft.probe.mgrs.sentence.{NCSentenceManager, NCSynonymsManager}
+import org.apache.nlpcraft.probe.mgrs.sentence.NCSentenceManager
+import org.apache.nlpcraft.probe.mgrs.synonyms.NCSynonymsManager
import org.apache.nlpcraft.probe.mgrs.{NCProbeMessage, NCProbeVariants}
import java.io.Serializable
@@ -294,6 +295,9 @@ object NCProbeEnrichmentManager extends NCService with NCOpenCensusModelStats {
): Unit = {
require(errMsg.isDefined || (resType.isDefined && resBody.isDefined))
+ NCSentenceManager.clearRequestData(srvReqId)
+ NCSynonymsManager.clearRequestData(srvReqId)
+
val msg = NCProbeMessage(msgName)
msg.addData("srvReqId", srvReqId)
@@ -554,9 +558,6 @@ object NCProbeEnrichmentManager extends NCService with NCOpenCensusModelStats {
var senVars = NCProbeVariants.convert(srvReqId, mdl, sensSeq, lastPhase = true)
- NCSentenceManager.clearRequestData(srvReqId)
- NCSynonymsManager.clearRequestData(srvReqId)
-
// Sentence variants can be filtered by model.
val fltSenVars: Seq[(NCVariant, Int)] =
senVars.
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
index 03c5b5d..c5ca532 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
@@ -27,7 +27,8 @@ import org.apache.nlpcraft.probe.mgrs.NCProbeSynonym.NCIdlContent
import org.apache.nlpcraft.probe.mgrs.NCProbeSynonymChunkKind.NCSynonymChunkKind
import org.apache.nlpcraft.probe.mgrs.nlp.NCProbeEnricher
import org.apache.nlpcraft.probe.mgrs.nlp.impl.NCRequestImpl
-import org.apache.nlpcraft.probe.mgrs.sentence.{NCSentenceManager, NCSynonymsManager}
+import org.apache.nlpcraft.probe.mgrs.sentence.NCSentenceManager
+import org.apache.nlpcraft.probe.mgrs.synonyms.NCSynonymsManager
import org.apache.nlpcraft.probe.mgrs.{NCProbeModel, NCProbeVariants, NCTokenPartKey, NCProbeSynonym => Synonym}
import java.io.Serializable
@@ -535,11 +536,11 @@ object NCModelEnricher extends NCProbeEnricher {
p.token
else {
// TODO: everywhere
- val clone = p.word.clone()
+ val notes = mutable.HashSet.empty[NlpNote]
- clone.filter(!_.isNlp).foreach(clone.remove)
+ notes += p.word.getNlpNote
- NCTokenImpl(mdl, ns.srvReqId, clone)
+ NCTokenImpl(mdl, ns.srvReqId, NlpToken(p.word.index, notes, p.word.stopsReasons))
}))
def execute(simpleEnabled: Boolean, idlEnabled: Boolean): Unit =
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/sentence/NCSentenceManager.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/sentence/NCSentenceManager.scala
index 2e280ac..34c3f87 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/sentence/NCSentenceManager.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/sentence/NCSentenceManager.scala
@@ -24,7 +24,6 @@ import org.apache.nlpcraft.common.nlp.{NCNlpSentence, NCNlpSentenceNote, NCNlpSe
import org.apache.nlpcraft.common.{NCE, NCService, U, _}
import org.apache.nlpcraft.model.NCModel
import org.apache.nlpcraft.probe.mgrs.NCTokenPartKey
-import org.apache.nlpcraft.probe.mgrs.sentence.NCSynonymsManager.{idlCache, reqCache}
import java.io.{Serializable => JSerializable}
import java.util
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/sentence/NCSynonymsManager.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/synonyms/NCSynonymsManager.scala
similarity index 85%
rename from nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/sentence/NCSynonymsManager.scala
rename to nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/synonyms/NCSynonymsManager.scala
index cf5eb5d..e9bf751 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/sentence/NCSynonymsManager.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/synonyms/NCSynonymsManager.scala
@@ -15,33 +15,28 @@
* limitations under the License.
*/
-package org.apache.nlpcraft.probe.mgrs.sentence
+package org.apache.nlpcraft.probe.mgrs.synonyms
import io.opencensus.trace.Span
import org.apache.nlpcraft.common.nlp.NCNlpSentenceToken
import org.apache.nlpcraft.common.{NCService, U}
-import org.apache.nlpcraft.model._
import org.apache.nlpcraft.model.intent.{NCIdlContext, NCIdlFunction}
+import org.apache.nlpcraft.model._
import org.apache.nlpcraft.probe.mgrs.NCProbeSynonym.NCIdlContent
import org.apache.nlpcraft.probe.mgrs.NCProbeSynonymChunkKind.{IDL, NCSynonymChunkKind, REGEX, TEXT}
import org.apache.nlpcraft.probe.mgrs.{NCProbeSynonymChunk, NCProbeSynonym => Synonym}
import scala.collection.mutable
+import scala.jdk.CollectionConverters.ListHasAsScala
/**
*
*/
object NCSynonymsManager extends NCService {
- case class Key(token: NCToken) {
- // NCToken hashCode and equals based on indexes. // TODO: check it!
- override def hashCode(): Int = U.mkJavaHash(token.getId, token)
- override def equals(obj: Any): Boolean = obj match {
- case key: Key => key.token.getId == token.getId && key.token == token
- }
- }
case class Value(request: NCRequest, variants: Seq[Seq[NCToken]], predicate: NCIdlFunction)
- private val idlCache = mutable.HashMap.empty[String, mutable.HashMap[Key, Value]]
+ // TODO: NCToken is not suitable key
+ private val idlCache = mutable.HashMap.empty[String, mutable.HashMap[NCToken, Value]]
override def start(parent: Span): NCService = {
ackStarting()
@@ -149,8 +144,7 @@ object NCSynonymsManager extends NCService {
* @param variantsToks
*/
private def save(req: NCRequest, tok: NCToken, pred: NCIdlFunction, variantsToks: Seq[Seq[NCToken]]): Unit =
- idlCache.getOrElseUpdate(req.getServerRequestId, mutable.HashMap.empty) +=
- Key(tok) -> Value(req, variantsToks, pred)
+ idlCache.getOrElseUpdate(req.getServerRequestId, mutable.HashMap.empty) += tok -> Value(req, variantsToks, pred)
/**
*
@@ -163,7 +157,8 @@ object NCSynonymsManager extends NCService {
tow: NCIdlContent, chunk: NCProbeSynonymChunk, req: NCRequest, variantsToks: Seq[Seq[NCToken]]
): Boolean = {
def get0[T](fromToken: NCToken => T, fromWord: NCNlpSentenceToken => T): T =
- if (tow.isLeft) fromToken(tow.swap.toOption.get) else fromWord(tow.toOption.get)
+ if (tow.isLeft) fromToken(tow.swap.toOption.get)
+ else fromWord(tow.toOption.get)
chunk.kind match {
case TEXT => chunk.wordStem == get0(_.stem, _.stem)
@@ -217,7 +212,7 @@ object NCSynonymsManager extends NCService {
* @param req
* @param variantsToks
*/
- def isMatch(s: Synonym, tows: Seq[NCIdlContent], req: NCRequest, variantsToks: Seq[Seq[NCToken]]): Boolean= {
+ def isMatch(s: Synonym, tows: Seq[NCIdlContent], req: NCRequest, variantsToks: Seq[Seq[NCToken]]): Boolean = {
require(tows != null)
if (tows.length == s.length && tows.count(_.isLeft) >= s.idlChunks)
@@ -256,7 +251,8 @@ object NCSynonymsManager extends NCService {
s,
tows,
(t: NCIdlContent, chunk: NCProbeSynonymChunk) => isMatch(t, chunk, req, variantsToks),
- (t: NCIdlContent) => if (t.isLeft) t.swap.toOption.get.getStartCharIndex else t.toOption.get.startCharIndex,
+ (t: NCIdlContent) => if (t.isLeft) t.swap.toOption.get.getStartCharIndex
+ else t.toOption.get.startCharIndex,
shouldBeNeighbors = !s.sparse
)
}
@@ -264,31 +260,40 @@ object NCSynonymsManager extends NCService {
/**
*
* @param srvReqId
- * @param toks
+ * @param sen
* @return
*/
- def isStillValid(srvReqId: String, toks: Seq[NCToken]): Boolean =
- toks.forall(tok =>
- idlCache.get(srvReqId) match {
- case Some(m) =>
- m.get(Key(tok)) match {
- case Some(v) =>
+ def isStillValid(srvReqId: String, sen: Seq[NCToken]): Boolean =
+ idlCache.get(srvReqId) match {
+ case Some(m) =>
+ lazy val allCheckedSenToks = {
+ val set = mutable.HashSet.empty[NCToken]
+ def add(t: NCToken): Unit = {
+ set += t
- val x =
- v.predicate.apply(
- tok, NCIdlContext(req = v.request, toks = toks)
- ).value.asInstanceOf[Boolean]
+ t.getPartTokens.asScala.foreach(add)
+ }
+ sen.foreach(add)
- if (!x)
- println("x="+x + ", t=" + tok + ", toks=" + toks)
+ set
+ }
+
+ sen.forall(tok =>
+ m.get(tok) match {
+ case Some(v) =>
+ v.variants.exists(winHistVariant =>
+ v.predicate.apply(
+ tok, NCIdlContext(toks = winHistVariant, req = v.request)
+ ).value.asInstanceOf[Boolean] &&
+ winHistVariant.forall(allCheckedSenToks.contains)
+ )
- x
case None => true
- }
- case None => true
- })
+ })
+ case None => true
+ }
/**
*
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/NCServerEnrichmentManager.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/NCServerEnrichmentManager.scala
index 03b749f..2f457cb 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/NCServerEnrichmentManager.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/NCServerEnrichmentManager.scala
@@ -156,7 +156,7 @@ object NCServerEnrichmentManager extends NCService with NCIgniteInstance {
if (h.enabledBuiltInTokens == normEnabledBuiltInToks) {
prepareAsciiTable(h.sentence).info(logger, Some(s"Sentence enriched (from cache): '$normTxt'"))
- h.sentence
+ h.sentence.copy(Some(U.genGuid()))
}
else
process(srvReqId, normTxt, enabledBuiltInToks, span)
@@ -224,7 +224,7 @@ object NCServerEnrichmentManager extends NCService with NCIgniteInstance {
.getNotes(hdr.noteType)
.filter(_.contains(hdr.noteName))
.map(note => {
- val s = note(hdr.noteName).toString()
+ val s = note(hdr.noteName).toString
if (isStopWord) s"${r(s)}" else s
})
.toSeq