You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nlpcraft.apache.org by se...@apache.org on 2021/02/26 08:06:11 UTC
[incubator-nlpcraft] branch NLPCRAFT-253 updated: WIP.
This is an automated email from the ASF dual-hosted git repository.
sergeykamov pushed a commit to branch NLPCRAFT-253
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
The following commit(s) were added to refs/heads/NLPCRAFT-253 by this push:
new 342b56f WIP.
342b56f is described below
commit 342b56f74a3db45ae2729d8c5f6ba9cd044bcaf8
Author: Sergey Kamov <sk...@gmail.com>
AuthorDate: Fri Feb 26 11:05:52 2021 +0300
WIP.
---
.../apache/nlpcraft/probe/mgrs/NCProbeModel.scala | 2 +-
.../probe/mgrs/NCProbeSynonymsWrapper.scala | 25 +++++----
.../probe/mgrs/deploy/NCDeployManager.scala | 9 +--
.../mgrs/nlp/enrichers/model/NCModelEnricher.scala | 64 ++++++++++++----------
4 files changed, 57 insertions(+), 43 deletions(-)
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeModel.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeModel.scala
index 155ffb5..acc2021 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeModel.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeModel.scala
@@ -40,7 +40,7 @@ case class NCProbeModel(
solver: NCIntentSolver,
intents: Seq[NCDslIntent],
synonyms: Map[String /*Element ID*/ , Map[Int /*Synonym length*/ , NCProbeSynonymsWrapper]], // Fast access map.
- synonymsDsl: Map[String /*Element ID*/ , Map[Int /*Synonym length*/ , NCProbeSynonymsWrapper]], // Fast access map.
+ synonymsDsl: Map[String /*Element ID*/ , Map[Int /*Synonym length*/ , Seq[NCProbeSynonym]]], // Fast access map.
addStopWordsStems: Set[String],
exclStopWordsStems: Set[String],
suspWordsStems: Set[String],
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeSynonymsWrapper.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeSynonymsWrapper.scala
index bb2362d..0dbd815 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeSynonymsWrapper.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeSynonymsWrapper.scala
@@ -19,25 +19,30 @@ package org.apache.nlpcraft.probe.mgrs
/**
* Synonyms sequence holder.
- *
*/
case class NCProbeSynonymsWrapper(
- textDirectSynonyms: Map[String, NCProbeSynonym],
- textNotDirectSynonyms: Map[String, NCProbeSynonym],
- otherDirectSynonyms: Seq[NCProbeSynonym],
- otherNotDirectSynonyms: Seq[NCProbeSynonym],
+ txtDirectSynonyms: Map[String, NCProbeSynonym],
+ txtNotDirectSynonyms: Map[String, NCProbeSynonym],
+ notTxtDirectSynonyms: Seq[NCProbeSynonym],
+ notTxtNotDirectSynonyms: Seq[NCProbeSynonym],
count: Int
)
object NCProbeSynonymsWrapper {
def apply(syns: Seq[NCProbeSynonym]): NCProbeSynonymsWrapper = {
- val txtSyns = syns.filter(_.isTextOnly)
+ // When it converted to map, more important values will be last and previous (less important elements) will be overridden .
+ val txtSyns = syns.filter(_.isTextOnly).sorted
+
+ // Required order by importance.
val other = syns.filter(!_.isTextOnly).sorted.reverse
+ def filter(seq: Seq[NCProbeSynonym], direct: Boolean): Seq[NCProbeSynonym] = seq.filter(_.isDirect == direct)
+ def toMap(seq: Seq[NCProbeSynonym]): Map[String, NCProbeSynonym] = seq.map(s ⇒ s.stems → s).toMap
+
NCProbeSynonymsWrapper(
- textDirectSynonyms = txtSyns.filter(_.isDirect).map(s ⇒ s.stems → s).toMap,
- textNotDirectSynonyms = txtSyns.filter(!_.isDirect).map(s ⇒ s.stems → s).toMap,
- otherDirectSynonyms = other.filter(_.isDirect),
- otherNotDirectSynonyms = other.filter(!_.isDirect),
+ txtDirectSynonyms = toMap(filter(txtSyns, direct = true)),
+ txtNotDirectSynonyms = toMap(filter(txtSyns, direct = false)),
+ notTxtDirectSynonyms = filter(other, direct = true),
+ notTxtNotDirectSynonyms = filter(other, direct = false),
count = syns.size
)
}
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCDeployManager.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCDeployManager.scala
index 136f221..a68e305 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCDeployManager.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCDeployManager.scala
@@ -502,8 +502,8 @@ object NCDeployManager extends NCService with DecorateAsScala {
model = mdl,
solver = solver,
intents = intents.keySet.toSeq,
- synonyms = mkFastAccessMap(filter(syns, dsl = false)),
- synonymsDsl = mkFastAccessMap(filter(syns, dsl = true)),
+ synonyms = mkFastAccessMap(filter(syns, dsl = false), NCProbeSynonymsWrapper(_)),
+ synonymsDsl = mkFastAccessMap(filter(syns, dsl = true), seq ⇒ seq),
addStopWordsStems = addStopWords.toSet,
exclStopWordsStems = exclStopWords.toSet,
suspWordsStems = suspWords.toSet,
@@ -550,7 +550,8 @@ object NCDeployManager extends NCService with DecorateAsScala {
* @param set
* @return
*/
- private def mkFastAccessMap(set: Set[SynonymHolder]): Map[String /*Element ID*/ , Map[Int /*Synonym length*/ , NCProbeSynonymsWrapper]] =
+ private def mkFastAccessMap[T](set: Set[SynonymHolder], f: Seq[NCProbeSynonym] ⇒ T):
+ Map[String /*Element ID*/ , Map[Int /*Synonym length*/ , T]] =
set
.groupBy(_.elmId)
.map {
@@ -561,7 +562,7 @@ object NCDeployManager extends NCService with DecorateAsScala {
.groupBy(_.size)
.map {
// Sort synonyms from most important to least important.
- case (k, v) ⇒ (k, NCProbeSynonymsWrapper(v.toSeq))
+ case (k, v) ⇒ (k, f(v.toSeq))
}
)
}
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
index b7e7a5d..61cd50f 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
@@ -336,7 +336,7 @@ object NCModelEnricher extends NCProbeEnricher with DecorateAsScala {
* @param len
* @return
*/
- def fastAccess(
+ def fastAccessNotDsl(
fastMap: Map[String /*Element ID*/ , Map[Int /*Synonym length*/ , NCProbeSynonymsWrapper]],
elmId: String,
len: Int
@@ -346,6 +346,15 @@ object NCModelEnricher extends NCProbeEnricher with DecorateAsScala {
case None ⇒ None
}
+ def fastAccessDls(
+ fastMap: Map[String /*Element ID*/ , Map[Int /*Synonym length*/ , Seq[NCProbeSynonym]]],
+ elmId: String,
+ len: Int): Seq[NCProbeSynonym] =
+ fastMap.get(elmId).flatMap(_.get(len)) match {
+ case Some(seq) ⇒ seq
+ case None ⇒ Seq.empty[NCProbeSynonym]
+ }
+
/**
*
* @param toks
@@ -389,26 +398,34 @@ object NCModelEnricher extends NCProbeEnricher with DecorateAsScala {
// Optimization - plain synonyms can be used only on first iteration
if (mdl.synonyms.nonEmpty && !ns.exists(_.isUser))
- fastAccess(mdl.synonyms, elm.getId, toks.length) match {
+ fastAccessNotDsl(mdl.synonyms, elm.getId, toks.length) match {
case Some(h) ⇒
val stems = toks.map(_.stem).mkString(" ")
- h.textDirectSynonyms.get(stems) match {
- case Some(syn) ⇒ addMatch(elm, toks, syn, Seq.empty)
- case None ⇒
- for (syn ← h.otherDirectSynonyms if !found)
- if (syn.isMatch(toks))
- addMatch(elm, toks, syn, Seq.empty)
-
- if (!found) {
- h.textNotDirectSynonyms.get(stems) match {
- case Some(syn) ⇒ addMatch(elm, toks, syn, Seq.empty)
- case None ⇒
- for (syn ← h.otherNotDirectSynonyms if !found)
- if (syn.isMatch(toks))
- addMatch(elm, toks, syn, Seq.empty)
- }
- }
+ def tryMap(synsMap: Map[String, NCProbeSynonym], notFound: () ⇒ Unit): Unit =
+ synsMap.get(stems) match {
+ case Some(syn) ⇒
+ addMatch(elm, toks, syn, Seq.empty)
+
+ if (!found)
+ notFound()
+ case None ⇒ notFound()
+ }
+
+ def scan(synsSeq: Seq[NCProbeSynonym]): Unit =
+ for (syn ← synsSeq if !found)
+ if (syn.isMatch(toks))
+ addMatch(elm, toks, syn, Seq.empty)
+
+ tryMap(
+ h.txtDirectSynonyms,
+ () ⇒ {
+ scan(h.notTxtDirectSynonyms)
+
+ if (!found)
+ tryMap(h.txtNotDirectSynonyms, () ⇒ scan(h.notTxtNotDirectSynonyms))
+ }
+ )
case None ⇒ // No-op.
}
@@ -423,16 +440,7 @@ object NCModelEnricher extends NCProbeEnricher with DecorateAsScala {
if (seq == null)
seq = convert(ns, collapsedSens, toks)
- for (comb ← seq) {
- fastAccess(mdl.synonymsDsl, elm.getId, comb.length) match {
- case Some(h) ⇒
- require()
- case None ⇒
- }
- }
- }
-
- for (comb ← seq; syn ← fastAccess(mdl.synonymsDsl, elm.getId, comb.length) if !found)
+ for (comb ← seq; syn ← fastAccessDls(mdl.synonymsDsl, elm.getId, comb.length) if !found)
if (syn.isMatch(comb.map(_.data)))
addMatch(elm, toks, syn, comb.filter(_.isToken).map(_.token))
}