You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nlpcraft.apache.org by se...@apache.org on 2021/04/03 07:59:22 UTC
[incubator-nlpcraft] branch NLPCRAFT-287 updated (bee2c6f ->
d23483f)
This is an automated email from the ASF dual-hosted git repository.
sergeykamov pushed a change to branch NLPCRAFT-287
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git.
from bee2c6f WIP.
new 7eebb40 WIP.
add eb9ffef Improved UNICODE in Antlr4.
add 5fcefe9 Configuration md5 fixes.
new d23483f Merge branch 'master' into NLPCRAFT-287
The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails. The revisions
listed as "add" were already present in the repository and have only
been added to this reference.
Summary of changes:
external/md5.txt | 2 +-
.../nlpcraft/model/intent/compiler/antlr4/NCIdl.g4 | 20 ++-----------
.../nlpcraft/probe/mgrs/NCProbeSynonym.scala | 34 ++++++++++++++--------
.../probe/mgrs/deploy/NCDeployManager.scala | 16 ++++++----
.../nlpcraft/probe/mgrs/model/NCModelManager.scala | 27 ++++++++---------
.../mgrs/nlp/enrichers/model/NCModelEnricher.scala | 16 +++++-----
.../nlpcraft/model/intent/idl/compiler/test_ok.idl | 26 ++++++++++++++++-
7 files changed, 85 insertions(+), 56 deletions(-)
[incubator-nlpcraft] 02/02: Merge branch 'master' into NLPCRAFT-287
Posted by se...@apache.org.
This is an automated email from the ASF dual-hosted git repository.
sergeykamov pushed a commit to branch NLPCRAFT-287
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
commit d23483fc453e85716df6f44df533c6d8ed10c430
Merge: 7eebb40 5fcefe9
Author: Sergey Kamov <sk...@gmail.com>
AuthorDate: Sat Apr 3 10:59:05 2021 +0300
Merge branch 'master' into NLPCRAFT-287
external/md5.txt | 2 +-
.../nlpcraft/model/intent/compiler/antlr4/NCIdl.g4 | 20 +++--------------
.../nlpcraft/model/intent/idl/compiler/test_ok.idl | 26 +++++++++++++++++++++-
3 files changed, 29 insertions(+), 19 deletions(-)
[incubator-nlpcraft] 01/02: WIP.
Posted by se...@apache.org.
This is an automated email from the ASF dual-hosted git repository.
sergeykamov pushed a commit to branch NLPCRAFT-287
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
commit 7eebb407a412ab152f07935782c56d003e40f214
Author: Sergey Kamov <sk...@gmail.com>
AuthorDate: Sat Apr 3 10:27:38 2021 +0300
WIP.
---
.../nlpcraft/probe/mgrs/NCProbeSynonym.scala | 34 ++++++++++++++--------
.../probe/mgrs/deploy/NCDeployManager.scala | 16 ++++++----
.../nlpcraft/probe/mgrs/model/NCModelManager.scala | 27 ++++++++---------
.../mgrs/nlp/enrichers/model/NCModelEnricher.scala | 16 +++++-----
4 files changed, 56 insertions(+), 37 deletions(-)
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeSynonym.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeSynonym.scala
index 92e9ece..d09418a 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeSynonym.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeSynonym.scala
@@ -33,12 +33,16 @@ import scala.collection.mutable
* In this case chunks contain value name.
* @param isDirect Direct or permuted synonym flag.
* @param value Optional value name if this is a value synonym.
+ * @param sparse Flag.
+ * @param perm Flag.
*/
class NCProbeSynonym(
val isElementId: Boolean,
val isValueName: Boolean,
val isDirect: Boolean,
- val value: String = null
+ val value: String = null,
+ val sparse: Boolean,
+ val perm: Boolean
) extends mutable.ArrayBuffer[NCProbeSynonymChunk] with Ordered[NCProbeSynonym] {
require((isElementId && !isValueName && value == null) || !isElementId)
require((isValueName && value != null) || !isValueName)
@@ -144,7 +148,7 @@ class NCProbeSynonym(
var ok = true
val buf = mutable.ArrayBuffer.empty[NCNlpSentenceToken]
- if (isDirect) {
+ if (!perm) {
var lastIdx = 0
val tokIdxs = sen.zipWithIndex.toMap
@@ -175,13 +179,10 @@ class NCProbeSynonym(
*/
private def isMatch(tow: NCDslContent, chunk: NCProbeSynonymChunk, req: NCRequest): Boolean = {
def get0[T](fromToken: NCToken ⇒ T, fromWord: NCNlpSentenceToken ⇒ T): T =
- if (tow.isLeft) fromToken(tow.left.get)
- else fromWord(tow.right.get)
+ if (tow.isLeft) fromToken(tow.left.get) else fromWord(tow.right.get)
chunk.kind match {
- case TEXT ⇒
- chunk.wordStem == get0(_.stem, _.stem)
-
+ case TEXT ⇒ chunk.wordStem == get0(_.stem, _.stem)
case REGEX ⇒
val r = chunk.regex
@@ -223,7 +224,7 @@ class NCProbeSynonym(
var ok = true
val buf = mutable.ArrayBuffer.empty[NCDslContent]
- if (isDirect) {
+ if (!perm) {
var lastIdx = 0
val tokIdxs = sen.zipWithIndex.toMap
@@ -243,7 +244,7 @@ class NCProbeSynonym(
convertResult(ok, buf)
}
- collectMatches(sen ,trySparseMatch0)
+ collectMatches(sen, trySparseMatch0)
}
override def toString(): String = mkString(" ")
@@ -339,10 +340,19 @@ object NCProbeSynonym {
* @param isDirect
* @param value
* @param chunks
- * @return
+ * @param sparse
+ * @param perm
*/
- def apply(isElementId: Boolean, isValueName: Boolean, isDirect: Boolean, value: String, chunks: Seq[NCProbeSynonymChunk]): NCProbeSynonym = {
- var syn = new NCProbeSynonym(isElementId, isValueName, isDirect, value)
+ def apply(
+ isElementId: Boolean,
+ isValueName: Boolean,
+ isDirect: Boolean,
+ value: String,
+ chunks: Seq[NCProbeSynonymChunk],
+ sparse: Boolean,
+ perm: Boolean
+ ): NCProbeSynonym = {
+ var syn = new NCProbeSynonym(isElementId, isValueName, isDirect, value, sparse, perm)
syn ++= chunks
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCDeployManager.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCDeployManager.scala
index 2889241..a9f4d4a 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCDeployManager.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCDeployManager.scala
@@ -218,16 +218,20 @@ object NCDeployManager extends NCService with DecorateAsScala {
s"]"
)
+ val sparse = elm.isSparse.orElse(mdl.isSparse)
+ val perm = elm.isPermutateSynonyms.orElse(mdl.isPermutateSynonyms)
+
def addSynonym(
isElementId: Boolean,
isValueName: Boolean,
value: String,
- chunks: Seq[NCProbeSynonymChunk]): Unit = {
+ chunks: Seq[NCProbeSynonymChunk]
+ ): Unit = {
def add(chunks: Seq[NCProbeSynonymChunk], isDirect: Boolean): Unit = {
val holder = SynonymHolder(
elmId = elmId,
- sparse = elm.isSparse.orElse(mdl.isSparse),
- syn = NCProbeSynonym(isElementId, isValueName, isDirect, value, chunks)
+ sparse = sparse,
+ syn = NCProbeSynonym(isElementId, isValueName, isDirect, value, chunks, sparse, perm)
)
if (syns.add(holder)) {
@@ -261,8 +265,10 @@ object NCDeployManager extends NCService with DecorateAsScala {
}
if (
- elm.isPermutateSynonyms.orElse(mdl.isPermutateSynonyms) &&
- !isElementId && chunks.forall(_.wordStem != null)
+ perm &&
+ !sparse &&
+ !isElementId &&
+ chunks.forall(_.wordStem != null)
)
simplePermute(chunks).map(p ⇒ p.map(_.wordStem) → p).toMap.values.foreach(p ⇒ add(p, p == chunks))
else
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/model/NCModelManager.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/model/NCModelManager.scala
index ddcc286..cdfdf89 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/model/NCModelManager.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/model/NCModelManager.scala
@@ -58,23 +58,24 @@ object NCModelManager extends NCService with DecorateAsScala {
data.values.foreach(w ⇒ {
val mdl = w.model
- val synCnt =
- w.sparseSynonyms.map(_._2.size).sum +
- w.sparseSynonymsDsl.map(_._2.size).sum +
- w.nonSparseSynonyms.flatMap(_._2.map(_._2.count)).sum +
- w.nonSparseSynonymsDsl.map(_._2.size).sum
-
+ val synCnt = w.nonSparseSynonyms.flatMap(_._2.map(_._2.count)).sum
+ val synDslCnt = w.nonSparseSynonymsDsl.map(_._2.size).sum
+ val synSparseCnt = w.sparseSynonyms.map(_._2.size).sum
+ val synSparseDslCnt = w.sparseSynonymsDsl.map(_._2.size).sum
val elmCnt = w.elements.keySet.size
val intentCnt = w.intents.size
tbl += Seq(
- s"Name: ${bo(c(mdl.getName))}",
- s"ID: ${bo(mdl.getId)}",
- s"Version: ${mdl.getVersion}",
- s"Origin: ${mdl.getOrigin}",
- s"Elements: $elmCnt" + (if (elmCnt == 0) s" ${r("(!)")}" else ""),
- s"Synonyms: $synCnt" + (if (synCnt == 0) s" ${r("(!)")}" else ""),
- s"Intents: $intentCnt" + (if (intentCnt == 0) s" ${r("(!)")}" else "")
+ s"Name: ${bo(c(mdl.getName))}",
+ s"ID: ${bo(mdl.getId)}",
+ s"Version: ${mdl.getVersion}",
+ s"Origin: ${mdl.getOrigin}",
+ s"Elements: $elmCnt" + (if (elmCnt == 0) s" ${r("(!)")}" else ""),
+ s"Synonyms: $synCnt" + (if (synCnt == 0) s" ${r("(!)")}" else ""),
+ s"Synonyms(DSL): $synDslCnt" + (if (synDslCnt == 0) s" ${r("(!)")}" else ""),
+ s"Synonyms(Sparse): $synSparseCnt" + (if (synSparseCnt == 0) s" ${r("(!)")}" else ""),
+ s"Synonyms(Sparse, DSL): $synSparseDslCnt" + (if (synSparseDslCnt == 0) s" ${r("(!)")}" else ""),
+ s"Intents: $intentCnt" + (if (intentCnt == 0) s" ${r("(!)")}" else "")
)
})
}
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
index ed80630..22a6a5b 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
@@ -19,7 +19,7 @@ package org.apache.nlpcraft.probe.mgrs.nlp.enrichers.model
import io.opencensus.trace.Span
import org.apache.nlpcraft.common._
-import org.apache.nlpcraft.common.nlp.{NCNlpSentenceToken ⇒ NlpToken, _}
+import org.apache.nlpcraft.common.nlp.{NCNlpSentenceToken => NlpToken, _}
import org.apache.nlpcraft.model._
import org.apache.nlpcraft.probe.mgrs.NCProbeSynonym.NCDslContent
import org.apache.nlpcraft.probe.mgrs.NCProbeSynonymChunkKind.{NCSynonymChunkKind, TEXT}
@@ -289,7 +289,6 @@ object NCModelEnricher extends NCProbeEnricher with DecorateAsScala {
if (!matches.exists(m ⇒ m.element.getId == elm.getId && tokensSet.subsetOf(m.tokensSet))) {
matches += ElementMatch(elm, toks, syn, parts)
-
true
}
else
@@ -357,8 +356,9 @@ object NCModelEnricher extends NCProbeEnricher with DecorateAsScala {
) {
_ ⇒
// 1. Simple, sparse.
- for ((elemId, syns) ← mdl.sparseSynonyms; syn ← syns)
- syn.trySparseMatch(ns).foreach(toks ⇒ addMatch(mdl.elements(elemId), toks, syn, Seq.empty))
+ if (!ns.exists(_.isUser))
+ for ((elemId, syns) ← mdl.sparseSynonyms; syn ← syns)
+ syn.trySparseMatch(ns).foreach(toks ⇒ addMatch(mdl.elements(elemId), toks, syn, Seq.empty))
// 2. DSL, sparse.
for ((elemId, syns) ← mdl.sparseSynonymsDsl; syn ← syns) {
@@ -423,9 +423,11 @@ object NCModelEnricher extends NCProbeEnricher with DecorateAsScala {
toks: Seq[NlpToken],
syn: NCProbeSynonym,
parts: Seq[TokenData]
- ): Unit =
- if (addMatch(elm, toks, syn, parts))
- found = true
+ ): Unit = {
+ addMatch(elm, toks, syn, parts)
+
+ found = true
+ }
// 3. Simple, not sparse.
// Optimization - plain synonyms can be used only on first iteration