You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nlpcraft.apache.org by se...@apache.org on 2021/02/26 06:59:33 UTC

[incubator-nlpcraft] branch NLPCRAFT-253 updated (341f57a -> 25ab72b)

This is an automated email from the ASF dual-hosted git repository.

sergeykamov pushed a change to branch NLPCRAFT-253
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git.


    from 341f57a  WIP.
     new cff997c  WIP.
     add f56b70e  WIP.
     add ec2c254  Fixed Javadoc issues for the mixed Scala/Java project.
     add 0f5ee6c  Probe sentences variants processing bugfixes.
     new 25ab72b  Merge branch 'master' into NLPCRAFT-253

The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 .../nlpcraft/common/makro/NCMacroJavaParser.java   |  77 +++++
 .../common/makro/NCMacroJavaParserTrait.java       |  59 ++++
 .../nlpcraft/common/makro/NCMacroParser.scala      |  11 +-
 .../apache/nlpcraft/common/nlp/NCNlpSentence.scala | 120 ++++---
 .../apache/nlpcraft/model/NCMacroProcessor.java    | 101 ++++++
 .../apache/nlpcraft/probe/mgrs/NCProbeModel.scala  |   2 +-
 .../probe/mgrs/NCProbeSynonymsWrapper.scala        |  38 +--
 .../nlpcraft/probe/mgrs/NCProbeVariants.scala      | 375 ++++++++++++---------
 .../probe/mgrs/deploy/NCDeployManager.scala        |   6 +-
 .../probe/mgrs/nlp/NCProbeEnrichmentManager.scala  |   2 +
 .../mgrs/nlp/enrichers/model/NCModelEnricher.scala |  50 +--
 .../nlpcraft/common/makro/NCMacroParserSpec.scala  |  20 +-
 .../abstract/NCAbstractTokensVariantsSpec.scala    |  20 +-
 ...pec3.scala => NCEnricherNestedModelSpec2.scala} |  48 +--
 .../model/NCEnricherNestedModelSpec3.scala         |  12 +-
 15 files changed, 633 insertions(+), 308 deletions(-)
 create mode 100644 nlpcraft/src/main/scala/org/apache/nlpcraft/common/makro/NCMacroJavaParser.java
 create mode 100644 nlpcraft/src/main/scala/org/apache/nlpcraft/common/makro/NCMacroJavaParserTrait.java
 create mode 100644 nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCMacroProcessor.java
 copy nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/{NCEnricherNestedModelSpec3.scala => NCEnricherNestedModelSpec2.scala} (54%)


[incubator-nlpcraft] 01/02: WIP.

Posted by se...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

sergeykamov pushed a commit to branch NLPCRAFT-253
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git

commit cff997c169c92ac98b5f64422bb3129e87f143a0
Author: Sergey Kamov <sk...@gmail.com>
AuthorDate: Thu Feb 25 22:15:37 2021 +0300

    WIP.
---
 .../apache/nlpcraft/probe/mgrs/NCProbeModel.scala  |  2 +-
 .../probe/mgrs/NCProbeSynonymsWrapper.scala        | 38 ++++++++--------
 .../probe/mgrs/deploy/NCDeployManager.scala        |  6 +--
 .../mgrs/nlp/enrichers/model/NCModelEnricher.scala | 50 +++++++++++++---------
 4 files changed, 51 insertions(+), 45 deletions(-)

diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeModel.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeModel.scala
index acc2021..155ffb5 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeModel.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeModel.scala
@@ -40,7 +40,7 @@ case class NCProbeModel(
     solver: NCIntentSolver,
     intents: Seq[NCDslIntent],
     synonyms: Map[String /*Element ID*/ , Map[Int /*Synonym length*/ , NCProbeSynonymsWrapper]], // Fast access map.
-    synonymsDsl: Map[String /*Element ID*/ , Map[Int /*Synonym length*/ , Seq[NCProbeSynonym]]], // Fast access map.
+    synonymsDsl: Map[String /*Element ID*/ , Map[Int /*Synonym length*/ , NCProbeSynonymsWrapper]], // Fast access map.
     addStopWordsStems: Set[String],
     exclStopWordsStems: Set[String],
     suspWordsStems: Set[String],
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeSynonymsWrapper.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeSynonymsWrapper.scala
index 22520be..bb2362d 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeSynonymsWrapper.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeSynonymsWrapper.scala
@@ -17,32 +17,28 @@
 
 package org.apache.nlpcraft.probe.mgrs
 
-import scala.collection.{Map, Seq}
-
 /**
-  * Synonyms sequence holder with optimized access if all synoyms have `text` type.
+  * Synonyms sequence holder.
   *
-  * @param isTextOnly
-  * @param synonyms
-  * @param synonymsByStems
   */
-class NCProbeSynonymsWrapper(
-    val isTextOnly: Boolean,
-    val synonyms: Seq[NCProbeSynonym],
-    val synonymsByStems: Map[String, NCProbeSynonym]
-) {
-    require(synonyms.isEmpty || synonymsByStems.isEmpty)
-
-    val count: Int = synonyms.size + synonymsByStems.size
-}
-
+case class NCProbeSynonymsWrapper(
+    textDirectSynonyms: Map[String, NCProbeSynonym],
+    textNotDirectSynonyms: Map[String, NCProbeSynonym],
+    otherDirectSynonyms: Seq[NCProbeSynonym],
+    otherNotDirectSynonyms: Seq[NCProbeSynonym],
+    count: Int
+)
 object NCProbeSynonymsWrapper {
     def apply(syns: Seq[NCProbeSynonym]): NCProbeSynonymsWrapper = {
-        val isTextOnly = syns.forall(_.isTextOnly)
+        val txtSyns = syns.filter(_.isTextOnly)
+        val other = syns.filter(!_.isTextOnly).sorted.reverse
 
-        if (isTextOnly)
-            new NCProbeSynonymsWrapper(isTextOnly, Seq.empty, syns.map(s ⇒ s.stems → s).toMap)
-        else
-            new NCProbeSynonymsWrapper(isTextOnly, syns, Map.empty)
+        NCProbeSynonymsWrapper(
+            textDirectSynonyms = txtSyns.filter(_.isDirect).map(s ⇒ s.stems → s).toMap,
+            textNotDirectSynonyms = txtSyns.filter(!_.isDirect).map(s ⇒ s.stems → s).toMap,
+            otherDirectSynonyms = other.filter(_.isDirect),
+            otherNotDirectSynonyms = other.filter(!_.isDirect),
+            count = syns.size
+        )
     }
 }
\ No newline at end of file
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCDeployManager.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCDeployManager.scala
index ee8e464..136f221 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCDeployManager.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCDeployManager.scala
@@ -502,7 +502,7 @@ object NCDeployManager extends NCService with DecorateAsScala {
             model = mdl,
             solver = solver,
             intents = intents.keySet.toSeq,
-            synonyms = mkFastAccessMap(filter(syns, dsl = false)).map(p ⇒ p._1 → p._2.map(p ⇒ p._1 → NCProbeSynonymsWrapper(p._2))),
+                synonyms = mkFastAccessMap(filter(syns, dsl = false)),
             synonymsDsl = mkFastAccessMap(filter(syns, dsl = true)),
             addStopWordsStems = addStopWords.toSet,
             exclStopWordsStems = exclStopWords.toSet,
@@ -550,7 +550,7 @@ object NCDeployManager extends NCService with DecorateAsScala {
       * @param set
       * @return
       */
-    private def mkFastAccessMap(set: Set[SynonymHolder]): Map[String /*Element ID*/ , Map[Int /*Synonym length*/ , Seq[NCProbeSynonym]]] =
+    private def mkFastAccessMap(set: Set[SynonymHolder]): Map[String /*Element ID*/ , Map[Int /*Synonym length*/ , NCProbeSynonymsWrapper]] =
         set
             .groupBy(_.elmId)
             .map {
@@ -561,7 +561,7 @@ object NCDeployManager extends NCService with DecorateAsScala {
                         .groupBy(_.size)
                         .map {
                             // Sort synonyms from most important to least important.
-                            case (k, v) ⇒ (k, v.toSeq.sorted.reverse)
+                            case (k, v) ⇒ (k, NCProbeSynonymsWrapper(v.toSeq))
                         }
                 )
             }
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
index 9d7b05e..b7e7a5d 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
@@ -336,19 +336,11 @@ object NCModelEnricher extends NCProbeEnricher with DecorateAsScala {
               * @param len
               * @return
               */
-            def fastAccessDsl(
-                fastMap: Map[String /*Element ID*/ , Map[Int /*Synonym length*/ , Seq[NCProbeSynonym]]],
-                elmId: String,
-                len: Int): Seq[NCProbeSynonym] =
-                fastMap.get(elmId).flatMap(_.get(len)) match {
-                    case Some(seq) ⇒ seq
-                    case None ⇒ Seq.empty[NCProbeSynonym]
-                }
-
             def fastAccess(
                 fastMap: Map[String /*Element ID*/ , Map[Int /*Synonym length*/ , NCProbeSynonymsWrapper]],
                 elmId: String,
-                len: Int): Option[NCProbeSynonymsWrapper] =
+                len: Int
+            ): Option[NCProbeSynonymsWrapper] =
                 fastMap.get(elmId) match {
                     case Some(m) ⇒ m.get(len)
                     case None ⇒ None
@@ -399,15 +391,24 @@ object NCModelEnricher extends NCProbeEnricher with DecorateAsScala {
                             if (mdl.synonyms.nonEmpty && !ns.exists(_.isUser))
                                 fastAccess(mdl.synonyms, elm.getId, toks.length) match {
                                     case Some(h) ⇒
-                                        if (h.isTextOnly)
-                                            h.synonymsByStems.get(toks.map(_.stem).mkString(" ")) match {
-                                                case Some(syn) ⇒ addMatch(elm, toks, syn, Seq.empty)
-                                                case None ⇒ // No-op.
-                                            }
-                                        else
-                                            for (syn ← h.synonyms if !found)
-                                                if (syn.isMatch(toks))
-                                                    addMatch(elm, toks, syn, Seq.empty)
+                                        val stems = toks.map(_.stem).mkString(" ")
+
+                                        h.textDirectSynonyms.get(stems) match {
+                                            case Some(syn) ⇒ addMatch(elm, toks, syn, Seq.empty)
+                                            case None ⇒
+                                                for (syn ← h.otherDirectSynonyms if !found)
+                                                    if (syn.isMatch(toks))
+                                                        addMatch(elm, toks, syn, Seq.empty)
+
+                                                if (!found) {
+                                                    h.textNotDirectSynonyms.get(stems) match {
+                                                        case Some(syn) ⇒ addMatch(elm, toks, syn, Seq.empty)
+                                                        case None ⇒
+                                                            for (syn ← h.otherNotDirectSynonyms if !found)
+                                                                if (syn.isMatch(toks))
+                                                                    addMatch(elm, toks, syn, Seq.empty)
+                                                    }
+                                        }
                                     case None ⇒ // No-op.
                                 }
 
@@ -422,7 +423,16 @@ object NCModelEnricher extends NCProbeEnricher with DecorateAsScala {
                                 if (seq == null)
                                     seq = convert(ns, collapsedSens, toks)
 
-                                for (comb ← seq; syn ← fastAccessDsl(mdl.synonymsDsl, elm.getId, comb.length) if !found)
+                                for (comb ← seq) {
+                                    fastAccess(mdl.synonymsDsl, elm.getId, comb.length) match {
+                                        case Some(h) ⇒
+                                            require()
+                                        case None ⇒
+                                    }
+                                }
+                            }
+
+                                for (comb ← seq; syn ← fastAccess(mdl.synonymsDsl, elm.getId, comb.length) if !found)
                                     if (syn.isMatch(comb.map(_.data)))
                                         addMatch(elm, toks, syn, comb.filter(_.isToken).map(_.token))
                             }


[incubator-nlpcraft] 02/02: Merge branch 'master' into NLPCRAFT-253

Posted by se...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

sergeykamov pushed a commit to branch NLPCRAFT-253
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git

commit 25ab72bff595b374f3088f0b86684622ece26107
Merge: cff997c 0f5ee6c
Author: Sergey Kamov <sk...@gmail.com>
AuthorDate: Fri Feb 26 09:59:12 2021 +0300

    Merge branch 'master' into NLPCRAFT-253

 .../nlpcraft/common/makro/NCMacroJavaParser.java   |  77 +++++
 .../common/makro/NCMacroJavaParserTrait.java       |  59 ++++
 .../nlpcraft/common/makro/NCMacroParser.scala      |  11 +-
 .../apache/nlpcraft/common/nlp/NCNlpSentence.scala | 120 ++++---
 .../apache/nlpcraft/model/NCMacroProcessor.java    | 101 ++++++
 .../nlpcraft/probe/mgrs/NCProbeVariants.scala      | 375 ++++++++++++---------
 .../probe/mgrs/nlp/NCProbeEnrichmentManager.scala  |   2 +
 .../nlpcraft/common/makro/NCMacroParserSpec.scala  |  20 +-
 .../abstract/NCAbstractTokensVariantsSpec.scala    |  20 +-
 ...pec3.scala => NCEnricherNestedModelSpec2.scala} |  48 +--
 .../model/NCEnricherNestedModelSpec3.scala         |  12 +-
 11 files changed, 582 insertions(+), 263 deletions(-)