You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nlpcraft.apache.org by ar...@apache.org on 2021/03/10 00:37:45 UTC

[incubator-nlpcraft] 02/17: WIP.

This is an automated email from the ASF dual-hosted git repository.

aradzinski pushed a commit to branch NLPCRAFT-261
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git

commit e2d36c4a39f22de3e87b7e43f756a4563ab13358
Author: Sergey Kamov <sk...@gmail.com>
AuthorDate: Mon Mar 1 19:08:35 2021 +0300

    WIP.
---
 .../nlpcraft/probe/mgrs/NCProbeSynonym.scala       | 24 +++++++--------
 .../probe/mgrs/deploy/NCDeployManager.scala        |  2 +-
 .../mgrs/nlp/enrichers/model/NCModelEnricher.scala | 34 +++++++++-------------
 3 files changed, 25 insertions(+), 35 deletions(-)

diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeSynonym.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeSynonym.scala
index 6ed8f44..b8b7dc6 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeSynonym.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeSynonym.scala
@@ -69,11 +69,10 @@ class NCProbeSynonym(
     def isMatch(toks: NCNlpSentenceTokenBuffer): Boolean = {
         require(toks != null)
 
-        val ok =
+        if (toks.length == length) {
             if (isTextOnly)
                 toks.stemsHash == stemsHash && toks.stems == stems
             else
-                // Same length.
                 toks.zip(this).sortBy(p ⇒ getSort(p._2.kind)).forall {
                     case (tok, chunk) ⇒
                         chunk.kind match {
@@ -83,9 +82,9 @@ class NCProbeSynonym(
                             case _ ⇒ throw new AssertionError()
                         }
                 }
-
-        // Should be called only for valid tokens count (validation optimized for performance reasons)
-        ok && toks.length == length
+        }
+        else
+            false
     }
 
     /**
@@ -100,27 +99,26 @@ class NCProbeSynonym(
         type Word = NCNlpSentenceToken
         type TokenOrWord = Either[Token, Word]
 
-        val ok =
-            // Same length.
+        if (tows.length == length && tows.count(_.isLeft) >= dslChunks)
             tows.zip(this).sortBy(p ⇒ getSort(p._2.kind)).forall {
                 case (tow, chunk) ⇒
                     def get0[T](fromToken: Token ⇒ T, fromWord: Word ⇒ T): T =
                         if (tow.isLeft) fromToken(tow.left.get) else fromWord(tow.right.get)
 
                     chunk.kind match {
-                        case TEXT ⇒ chunk.wordStem == get0((t: Token) ⇒ t.stem, (w: Word) ⇒ w.stem)
+                        case TEXT ⇒ chunk.wordStem == get0(_.stem, _.stem)
                         case REGEX ⇒
                             val r = chunk.regex
 
-                            r.matcher(get0((t: Token) ⇒ t.origText, (w: Word) ⇒ w.origText)).matches() ||
-                            r.matcher(get0((t: Token) ⇒ t.normText, (w: Word) ⇒ w.normText)).matches()
-                        case DSL ⇒ get0((t: Token) ⇒ chunk.dslPred.apply(t), (_: Word) ⇒ false)
+                            r.matcher(get0(_.origText, _.origText)).matches() ||
+                            r.matcher(get0(_.normText, _.normText)).matches()
+                        case DSL ⇒ get0(t ⇒ chunk.dslPred.apply(t), _ ⇒ false)
 
                         case _ ⇒ throw new AssertionError()
                     }
             }
-        // Should be called only for valid tokens count (validation optimized for performance reasons)
-        ok && tows.length == length
+        else
+            false
     }
     
     override def toString(): String = mkString(" ")
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCDeployManager.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCDeployManager.scala
index a68e305..21eaaab 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCDeployManager.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCDeployManager.scala
@@ -503,7 +503,7 @@ object NCDeployManager extends NCService with DecorateAsScala {
             solver = solver,
             intents = intents.keySet.toSeq,
             synonyms = mkFastAccessMap(filter(syns, dsl = false), NCProbeSynonymsWrapper(_)),
-            synonymsDsl = mkFastAccessMap(filter(syns, dsl = true), seq ⇒ seq),
+            synonymsDsl = mkFastAccessMap(filter(syns, dsl = true), _.sorted.reverse),
             addStopWordsStems = addStopWords.toSet,
             exclStopWordsStems = exclStopWords.toSet,
             suspWordsStems = suspWords.toSet,
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
index 8ecb39d..2a9dec0 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
@@ -263,7 +263,7 @@ object NCModelEnricher extends NCProbeEnricher with DecorateAsScala {
       * @param toks
       * @return
       */
-    protected def combos[T](toks: Seq[T]): Seq[Seq[T]] =
+    private def combos[T](toks: Seq[T]): Seq[Seq[T]] =
         (for (n ← toks.size until 0 by -1) yield toks.sliding(n)).flatten.map(p ⇒ p)
 
     /**
@@ -296,7 +296,7 @@ object NCModelEnricher extends NCProbeEnricher with DecorateAsScala {
                     varToks.flatMap(t ⇒
                         // Single word token is not split as words - token.
                         // Partly (not strict in) token - word.
-                        if ((toksComb.contains(t) || isSingleWord(t)) && inStrict(t))
+                        if (inStrict(t) && (toksComb.contains(t) || isSingleWord(t)))
                             Seq(Complex(Left(t)))
                         else
                             t.wordIndexes.filter(nlpWordIdxs.contains).map(i ⇒ Complex(Right(initialSen(i))))
@@ -355,7 +355,7 @@ object NCModelEnricher extends NCProbeEnricher with DecorateAsScala {
                 toks.map(t ⇒ (t.origText, t.index)).mkString(" ")
 
             var permCnt = 0
-            var collapsedSens: Seq[Seq[NCToken]] = null
+            lazy val collapsedSens = NCProbeVariants.convert(ns.srvReqId, mdl, ns.clone().collapse(mdl.model)).map(_.asScala)
 
             /**
               *
@@ -366,10 +366,12 @@ object NCModelEnricher extends NCProbeEnricher with DecorateAsScala {
 
                 for (toks ← combos(perm)) {
                     val key = toks.map(_.index).sorted
-                    val sparsity = U.calcSparsity(key)
 
                     if (!cache.contains(key)) {
-                        var seq: Seq[Seq[Complex]] = null
+                        cache += key
+
+                        lazy val dslCombs = convert(ns, collapsedSens, toks).groupBy(_.length)
+                        lazy val sparsity = U.calcSparsity(key)
 
                         // Attempt to match each element.
                         for (elm ← mdl.elements.values if !alreadyMarked(toks, elm.getId)) {
@@ -426,31 +428,21 @@ object NCModelEnricher extends NCProbeEnricher with DecorateAsScala {
                             if (mdl.synonymsDsl.nonEmpty) {
                                 found = false
 
-                                if (collapsedSens == null)
-                                    collapsedSens =
-                                        NCProbeVariants.
-                                            convert(ns.srvReqId, mdl, ns.clone().collapse(mdl.model)).map(_.asScala)
-
-                                if (seq == null)
-                                    seq = convert(ns, collapsedSens, toks)
-
                                 for (
-                                    comb ← seq;
-                                    syn ← fastAccess(mdl.synonymsDsl, elm.getId, comb.length).getOrElse(Seq.empty)
-                                    if !found
+                                    (len, seq) ← dslCombs;
+                                    syn ← fastAccess(mdl.synonymsDsl, elm.getId, len).getOrElse(Seq.empty);
+                                    comb ← seq if !found;
+                                    data = comb.map(_.data)
                                 )
-                                    if (syn.isMatch(comb.map(_.data))) {
+                                    if (syn.isMatch(data)) {
                                         val parts = comb.zip(syn.map(_.kind)).flatMap {
-                                            case (complex, kind) ⇒
-                                                if (complex.isToken) Some(complex.token → kind) else None
+                                            case (complex, kind) ⇒ if (complex.isToken) Some(complex.token → kind) else None
                                         }
 
                                         addMatch(elm, toks, syn, parts)
                                     }
                             }
                         }
-
-                        cache += key
                     }
                 }
             }