You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nlpcraft.apache.org by se...@apache.org on 2021/02/26 08:06:11 UTC

[incubator-nlpcraft] branch NLPCRAFT-253 updated: WIP.

This is an automated email from the ASF dual-hosted git repository.

sergeykamov pushed a commit to branch NLPCRAFT-253
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git


The following commit(s) were added to refs/heads/NLPCRAFT-253 by this push:
     new 342b56f  WIP.
342b56f is described below

commit 342b56f74a3db45ae2729d8c5f6ba9cd044bcaf8
Author: Sergey Kamov <sk...@gmail.com>
AuthorDate: Fri Feb 26 11:05:52 2021 +0300

    WIP.
---
 .../apache/nlpcraft/probe/mgrs/NCProbeModel.scala  |  2 +-
 .../probe/mgrs/NCProbeSynonymsWrapper.scala        | 25 +++++----
 .../probe/mgrs/deploy/NCDeployManager.scala        |  9 +--
 .../mgrs/nlp/enrichers/model/NCModelEnricher.scala | 64 ++++++++++++----------
 4 files changed, 57 insertions(+), 43 deletions(-)

diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeModel.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeModel.scala
index 155ffb5..acc2021 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeModel.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeModel.scala
@@ -40,7 +40,7 @@ case class NCProbeModel(
     solver: NCIntentSolver,
     intents: Seq[NCDslIntent],
     synonyms: Map[String /*Element ID*/ , Map[Int /*Synonym length*/ , NCProbeSynonymsWrapper]], // Fast access map.
-    synonymsDsl: Map[String /*Element ID*/ , Map[Int /*Synonym length*/ , NCProbeSynonymsWrapper]], // Fast access map.
+    synonymsDsl: Map[String /*Element ID*/ , Map[Int /*Synonym length*/ , Seq[NCProbeSynonym]]], // Fast access map.
     addStopWordsStems: Set[String],
     exclStopWordsStems: Set[String],
     suspWordsStems: Set[String],
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeSynonymsWrapper.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeSynonymsWrapper.scala
index bb2362d..0dbd815 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeSynonymsWrapper.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeSynonymsWrapper.scala
@@ -19,25 +19,30 @@ package org.apache.nlpcraft.probe.mgrs
 
 /**
   * Synonyms sequence holder.
-  *
   */
 case class NCProbeSynonymsWrapper(
-    textDirectSynonyms: Map[String, NCProbeSynonym],
-    textNotDirectSynonyms: Map[String, NCProbeSynonym],
-    otherDirectSynonyms: Seq[NCProbeSynonym],
-    otherNotDirectSynonyms: Seq[NCProbeSynonym],
+    txtDirectSynonyms: Map[String, NCProbeSynonym],
+    txtNotDirectSynonyms: Map[String, NCProbeSynonym],
+    notTxtDirectSynonyms: Seq[NCProbeSynonym],
+    notTxtNotDirectSynonyms: Seq[NCProbeSynonym],
     count: Int
 )
 object NCProbeSynonymsWrapper {
     def apply(syns: Seq[NCProbeSynonym]): NCProbeSynonymsWrapper = {
-        val txtSyns = syns.filter(_.isTextOnly)
+        // When it converted to map, more important values will be last and previous (less important elements) will be overridden .
+        val txtSyns = syns.filter(_.isTextOnly).sorted
+
+        // Required order by importance.
         val other = syns.filter(!_.isTextOnly).sorted.reverse
 
+        def filter(seq: Seq[NCProbeSynonym], direct: Boolean): Seq[NCProbeSynonym] = seq.filter(_.isDirect == direct)
+        def toMap(seq: Seq[NCProbeSynonym]): Map[String, NCProbeSynonym] = seq.map(s ⇒ s.stems → s).toMap
+
         NCProbeSynonymsWrapper(
-            textDirectSynonyms = txtSyns.filter(_.isDirect).map(s ⇒ s.stems → s).toMap,
-            textNotDirectSynonyms = txtSyns.filter(!_.isDirect).map(s ⇒ s.stems → s).toMap,
-            otherDirectSynonyms = other.filter(_.isDirect),
-            otherNotDirectSynonyms = other.filter(!_.isDirect),
+            txtDirectSynonyms = toMap(filter(txtSyns, direct = true)),
+            txtNotDirectSynonyms = toMap(filter(txtSyns, direct = false)),
+            notTxtDirectSynonyms = filter(other, direct = true),
+            notTxtNotDirectSynonyms = filter(other, direct = false),
             count = syns.size
         )
     }
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCDeployManager.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCDeployManager.scala
index 136f221..a68e305 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCDeployManager.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCDeployManager.scala
@@ -502,8 +502,8 @@ object NCDeployManager extends NCService with DecorateAsScala {
             model = mdl,
             solver = solver,
             intents = intents.keySet.toSeq,
-                synonyms = mkFastAccessMap(filter(syns, dsl = false)),
-            synonymsDsl = mkFastAccessMap(filter(syns, dsl = true)),
+            synonyms = mkFastAccessMap(filter(syns, dsl = false), NCProbeSynonymsWrapper(_)),
+            synonymsDsl = mkFastAccessMap(filter(syns, dsl = true), seq ⇒ seq),
             addStopWordsStems = addStopWords.toSet,
             exclStopWordsStems = exclStopWords.toSet,
             suspWordsStems = suspWords.toSet,
@@ -550,7 +550,8 @@ object NCDeployManager extends NCService with DecorateAsScala {
       * @param set
       * @return
       */
-    private def mkFastAccessMap(set: Set[SynonymHolder]): Map[String /*Element ID*/ , Map[Int /*Synonym length*/ , NCProbeSynonymsWrapper]] =
+    private def mkFastAccessMap[T](set: Set[SynonymHolder], f: Seq[NCProbeSynonym] ⇒ T):
+        Map[String /*Element ID*/ , Map[Int /*Synonym length*/ , T]] =
         set
             .groupBy(_.elmId)
             .map {
@@ -561,7 +562,7 @@ object NCDeployManager extends NCService with DecorateAsScala {
                         .groupBy(_.size)
                         .map {
                             // Sort synonyms from most important to least important.
-                            case (k, v) ⇒ (k, NCProbeSynonymsWrapper(v.toSeq))
+                            case (k, v) ⇒ (k, f(v.toSeq))
                         }
                 )
             }
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
index b7e7a5d..61cd50f 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
@@ -336,7 +336,7 @@ object NCModelEnricher extends NCProbeEnricher with DecorateAsScala {
               * @param len
               * @return
               */
-            def fastAccess(
+            def fastAccessNotDsl(
                 fastMap: Map[String /*Element ID*/ , Map[Int /*Synonym length*/ , NCProbeSynonymsWrapper]],
                 elmId: String,
                 len: Int
@@ -346,6 +346,15 @@ object NCModelEnricher extends NCProbeEnricher with DecorateAsScala {
                     case None ⇒ None
                 }
 
+            def fastAccessDls(
+                fastMap: Map[String /*Element ID*/ , Map[Int /*Synonym length*/ , Seq[NCProbeSynonym]]],
+                elmId: String,
+                len: Int): Seq[NCProbeSynonym] =
+                fastMap.get(elmId).flatMap(_.get(len)) match {
+                    case Some(seq) ⇒ seq
+                    case None ⇒ Seq.empty[NCProbeSynonym]
+                }
+
             /**
               *
               * @param toks
@@ -389,26 +398,34 @@ object NCModelEnricher extends NCProbeEnricher with DecorateAsScala {
 
                             // Optimization - plain synonyms can be used only on first iteration
                             if (mdl.synonyms.nonEmpty && !ns.exists(_.isUser))
-                                fastAccess(mdl.synonyms, elm.getId, toks.length) match {
+                                fastAccessNotDsl(mdl.synonyms, elm.getId, toks.length) match {
                                     case Some(h) ⇒
                                         val stems = toks.map(_.stem).mkString(" ")
 
-                                        h.textDirectSynonyms.get(stems) match {
-                                            case Some(syn) ⇒ addMatch(elm, toks, syn, Seq.empty)
-                                            case None ⇒
-                                                for (syn ← h.otherDirectSynonyms if !found)
-                                                    if (syn.isMatch(toks))
-                                                        addMatch(elm, toks, syn, Seq.empty)
-
-                                                if (!found) {
-                                                    h.textNotDirectSynonyms.get(stems) match {
-                                                        case Some(syn) ⇒ addMatch(elm, toks, syn, Seq.empty)
-                                                        case None ⇒
-                                                            for (syn ← h.otherNotDirectSynonyms if !found)
-                                                                if (syn.isMatch(toks))
-                                                                    addMatch(elm, toks, syn, Seq.empty)
-                                                    }
-                                        }
+                                        def tryMap(synsMap: Map[String, NCProbeSynonym], notFound: () ⇒ Unit): Unit =
+                                            synsMap.get(stems) match {
+                                                case Some(syn) ⇒
+                                                    addMatch(elm, toks, syn, Seq.empty)
+
+                                                    if (!found)
+                                                        notFound()
+                                                case None ⇒ notFound()
+                                            }
+
+                                        def scan(synsSeq: Seq[NCProbeSynonym]): Unit =
+                                            for (syn ← synsSeq if !found)
+                                                if (syn.isMatch(toks))
+                                                    addMatch(elm, toks, syn, Seq.empty)
+
+                                        tryMap(
+                                            h.txtDirectSynonyms,
+                                            () ⇒ {
+                                                scan(h.notTxtDirectSynonyms)
+
+                                                if (!found)
+                                                    tryMap(h.txtNotDirectSynonyms, () ⇒ scan(h.notTxtNotDirectSynonyms))
+                                            }
+                                        )
                                     case None ⇒ // No-op.
                                 }
 
@@ -423,16 +440,7 @@ object NCModelEnricher extends NCProbeEnricher with DecorateAsScala {
                                 if (seq == null)
                                     seq = convert(ns, collapsedSens, toks)
 
-                                for (comb ← seq) {
-                                    fastAccess(mdl.synonymsDsl, elm.getId, comb.length) match {
-                                        case Some(h) ⇒
-                                            require()
-                                        case None ⇒
-                                    }
-                                }
-                            }
-
-                                for (comb ← seq; syn ← fastAccess(mdl.synonymsDsl, elm.getId, comb.length) if !found)
+                                for (comb ← seq; syn ← fastAccessDls(mdl.synonymsDsl, elm.getId, comb.length) if !found)
                                     if (syn.isMatch(comb.map(_.data)))
                                         addMatch(elm, toks, syn, comb.filter(_.isToken).map(_.token))
                             }