You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nlpcraft.apache.org by se...@apache.org on 2021/02/22 13:29:47 UTC

[incubator-nlpcraft] branch NLPCRAFT-249 created (now 71f7e1c)

This is an automated email from the ASF dual-hosted git repository.

sergeykamov pushed a change to branch NLPCRAFT-249
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git.


      at 71f7e1c  WIP.

This branch includes the following new commits:

     new 71f7e1c  WIP.

The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.



[incubator-nlpcraft] 01/01: WIP.

Posted by se...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

sergeykamov pushed a commit to branch NLPCRAFT-249
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git

commit 71f7e1c0fa7ada8bcb6b2f80e33cfd36483eb180
Author: Sergey Kamov <sk...@gmail.com>
AuthorDate: Mon Feb 22 16:29:22 2021 +0300

    WIP.
---
 .../mgrs/nlp/enrichers/model/NCModelEnricher.scala | 31 ++++++-
 .../nlpcraft/model/jiggle/NCJiggleSpec.scala       | 99 ++++++++++++++++++++++
 2 files changed, 128 insertions(+), 2 deletions(-)

diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
index 28fcd21..d7ea084 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
@@ -431,10 +431,37 @@ object NCModelEnricher extends NCProbeEnricher with DecorateAsScala {
                 "totalJiggledPerms" → permCnt
             )
 
-            val matchCnt = matches.size
+            // Scans by elements which found with same tokens length.
+            // Inside, for which token we drop all non-optimized combinations.
+            // Example:
+            // 1. element's synonym - 'a b', jiggle factor 4 (default), isPermuteSynonyms 'true' (default)
+            // 2. Request 'a b a b',
+            // Initially found 0-1, 1-2, 2-3, 0-3.
+            // 0-3 will be deleted because for 0 and 3 tokens best variants found for same element with same tokens length.
+            val matchesNorm =
+                matches.
+                flatMap(m ⇒ m.tokens.map(_ → m)).
+                groupBy { case (t, m) ⇒ (m.element.getId, m.length, t) }.
+                flatMap { case (_, seq) ⇒
+                    def perm[T](list: List[List[T]]): List[List[T]] =
+                        list match {
+                            case Nil ⇒ List(Nil)
+                            case head :: tail ⇒ for (n ← head; t ← perm(tail)) yield n :: t
+                        }
+
+                    // Optimization by sparsity sum for each tokens set for one element found with same tokens count.
+                    perm(
+                        seq.groupBy { case (tok, _) ⇒ tok }.
+                        map { case (_, seq) ⇒ seq.map { case (_, m) ⇒ m} .toList }.toList
+                    ).minBy(_.map(_.sparsity).sum)
+                }.
+                toSeq.
+                distinct
+
+            val matchCnt = matchesNorm.size
 
             // Add notes for all remaining (non-intersecting) matches.
-            for ((m, idx) ← matches.zipWithIndex) {
+            for ((m, idx) ← matchesNorm.zipWithIndex) {
                 if (DEEP_DEBUG)
                     logger.trace(
                         s"Model '${mdl.model.getId}' element found (${idx + 1} of $matchCnt) [" +
diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/model/jiggle/NCJiggleSpec.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/model/jiggle/NCJiggleSpec.scala
new file mode 100644
index 0000000..24ca12c
--- /dev/null
+++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/model/jiggle/NCJiggleSpec.scala
@@ -0,0 +1,99 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nlpcraft.model.jiggle
+
+import org.apache.nlpcraft.model.`abstract`.NCAbstractTokensModel
+import org.apache.nlpcraft.model.{NCContext, NCElement, NCResult, NCToken}
+import org.apache.nlpcraft.{NCTestContext, NCTestElement, NCTestEnvironment}
+import org.junit.jupiter.api.Test
+
+import java.util
+import scala.collection.JavaConverters._
+import scala.collection.mutable
+
+class NJiggleModel1 extends NCAbstractTokensModel {
+    override def getElements: util.Set[NCElement] = Set(NCTestElement("xyz", "x y z"))
+
+    // Default values.
+    override def isPermutateSynonyms: Boolean = true
+    override def getJiggleFactor: Int = 4
+
+    override def onContext(ctx: NCContext): NCResult = {
+        val variants = ctx.getVariants.asScala
+
+        def checkOneVariant(sparsity: Int): Unit = {
+            require(variants.size == 1)
+
+            val toks = variants.head.asScala.filter(_.getId == "xyz")
+
+            require(toks.size == 3)
+
+            checkSparsity(sparsity, toks)
+        }
+
+        def checkSparsity(sparsity: Int, toks: mutable.Buffer[NCToken]): Unit =
+            require(toks.forall(_.getMetadata.get("nlpcraft:nlp:sparsity").asInstanceOf[Int] == sparsity))
+
+        def checkExists(sparsity: Int): Unit = {
+            require(
+                variants.exists(v ⇒ {
+                    val toks = v.asScala.filter(_.getId == "xyz")
+
+                    toks.size match {
+                        case 3 ⇒
+                            checkSparsity(sparsity, toks)
+
+                            true
+                        case _ ⇒ false
+                    }
+                })
+            )
+        }
+
+        ctx.getRequest.getNormalizedText match {
+            case "x y z x y z x y z" ⇒ checkOneVariant(0)
+            case "x y z test x y z test x y z test" ⇒ checkOneVariant(0)
+            case "x test y z x test y z x y test z" ⇒ checkOneVariant(1)
+            case "x z y x z y x z y" ⇒ checkExists(0)
+            case "x z y test x z y test x z y test" ⇒ checkExists(0)
+            case "x test z y x test z y x test z y" ⇒ checkExists(1)
+
+            case _ ⇒ throw new AssertionError(s"Unexpected request: ${ctx.getRequest.getNormalizedText}")
+        }
+
+        NCResult.text("OK")
+    }
+
+}
+
+@NCTestEnvironment(model = classOf[NJiggleModel1], startClient = true)
+class NCJiggleSpec1 extends NCTestContext {
+    @Test
+    def test(): Unit = {
+        checkResult("x y z x y z x y z", "OK")
+        checkResult("x y z test x y z test x y z test", "OK")
+        checkResult("x test y z x test y z x y test z", "OK")
+
+        // We don't check for sparsity > 1 because logic of synonyms permutation (neighbors only).
+        // Tests will not be clear.
+
+        checkResult("x z y x z y x z y", "OK")
+        checkResult("x z y test x z y test x z y test", "OK")
+        checkResult("x test z y x test z y x test z y", "OK")
+    }
+}
\ No newline at end of file