You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@nlpcraft.apache.org by se...@apache.org on 2020/08/25 09:40:49 UTC

[incubator-nlpcraft] branch NLPCRAFT-41 updated (8296a9c -> 0edaea8)

This is an automated email from the ASF dual-hosted git repository.

sergeykamov pushed a change to branch NLPCRAFT-41
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git.


    from 8296a9c  WIP.
     new fb0f5c7  WIP.
     new 0edaea8  WIP.

The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 .../nlpcraft/server/model/NCEnhanceManager.scala   | 116 +++++++++++++++++----
 .../nlpcraft/server/model/NCEnhanceType.scala      |   5 +-
 .../nlpcraft/server/rest/NCBasicRestApi.scala      |  17 +--
 3 files changed, 110 insertions(+), 28 deletions(-)

[incubator-nlpcraft] 02/02: WIP.

Posted by se...@apache.org.

This is an automated email from the ASF dual-hosted git repository.

sergeykamov pushed a commit to branch NLPCRAFT-41
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git

commit 0edaea86bfa52e56ea3a6acac0c1d43e33433ede
Author: Sergey Kamov <se...@apache.org>
AuthorDate: Tue Aug 25 12:40:40 2020 +0300

    WIP.
---
 .../nlpcraft/server/model/NCEnhanceManager.scala   | 92 +++++++++++++++++-----
 .../nlpcraft/server/model/NCEnhanceType.scala      |  3 +-
 .../nlpcraft/server/rest/NCBasicRestApi.scala      | 17 ++--
 3 files changed, 84 insertions(+), 28 deletions(-)

diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/model/NCEnhanceManager.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/model/NCEnhanceManager.scala
index be74437..77495c7 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/model/NCEnhanceManager.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/model/NCEnhanceManager.scala
@@ -17,7 +17,7 @@
 
 package org.apache.nlpcraft.server.model
 
-import java.util.concurrent.atomic.AtomicInteger
+import java.util.concurrent.atomic.{AtomicInteger, AtomicReference}
 import java.util.concurrent.{ConcurrentHashMap, CopyOnWriteArrayList, CountDownLatch, TimeUnit}
 import java.util.{List ⇒ JList}
 
@@ -45,13 +45,19 @@ import scala.collection._
   * TODO:
   */
 object NCEnhanceManager extends NCService {
+    // 1. SUGGEST_SYNONYMS
     // For context word server requests.
-    private final val SUGGS_MAX_LIMIT: Int = 10000
-    private final val SUGGS_BATCH_SIZE = 20
+    private final val SUGGEST_SYNONYMS_MAX_LIMIT: Int = 10000
+    private final val SUGGEST_SYNONYMS_BATCH_SIZE = 20
 
     // For warnings.
-    private final val MIN_CNT_INTENT = 5
-    private final val MIN_CNT_MODEL = 20
+    private final val SUGGEST_SYNONYMS_MIN_CNT_INTENT = 5
+    private final val SUGGEST_SYNONYMS_MIN_CNT_MODEL = 20
+
+    // 2. VALIDATION_MACROS
+
+    // 3. VALIDATION_SYNONYMS
+    private final val VALIDATION_SYNONYMS_MANY_SYNS = 20000
 
     private object Config extends NCConfigurable {
         val urlOpt: Option[String] = getStringOpt("nlpcraft.server.ctxword.url")
@@ -139,12 +145,19 @@ object NCEnhanceManager extends NCService {
         NCEnhanceResponse(typ, resp.errors, resp.warnings, resp.suggestions)
 
     /**
+      *
+      * @param seq
+      * @return
+      */
+    private def norm(seq: Seq[String]): Option[Seq[String]] = if (seq.isEmpty) None else Some(seq)
+
+    /**
       * @param mdlId Model ID.
       * @param parent Parent.
       */
     @throws[NCE]
-    private def suggest(mdlId: String, parent: Span = null): Response =
-        startScopedSpan("suggest", parent, "modelId" → mdlId) { _ ⇒
+    private def suggestSynonyms(mdlId: String, parent: Span = null): Response =
+        startScopedSpan("suggestSynonyms", parent, "modelId" → mdlId) { _ ⇒
             val url = s"${Config.urlOpt.getOrElse(throw new NCE("Context word server is not configured"))}/suggestions"
 
             val mdl = NCProbeManager.getModel(mdlId)
@@ -157,17 +170,17 @@ object NCEnhanceManager extends NCService {
 
             val warns = mutable.ArrayBuffer.empty[String]
 
-            if (allSamplesCnt < MIN_CNT_MODEL)
+            if (allSamplesCnt < SUGGEST_SYNONYMS_MIN_CNT_MODEL)
                 // TODO: text
                 warns +=
                     s"Model: '$mdlId' has too small intents samples count: $allSamplesCnt. " +
                     s"Potentially is can be not enough for suggestions service high quality work. " +
-                    s"Try to increase their count at least to $MIN_CNT_MODEL."
+                    s"Try to increase their count at least to $SUGGEST_SYNONYMS_MIN_CNT_MODEL."
 
             else {
                 val ids =
                     mdl.intentsSamples.
-                        filter { case (_, samples) ⇒ samples.size < MIN_CNT_INTENT }.
+                        filter { case (_, samples) ⇒ samples.size < SUGGEST_SYNONYMS_MIN_CNT_INTENT }.
                         map { case (intentId, _) ⇒ intentId }
 
                 if (ids.nonEmpty)
@@ -175,7 +188,7 @@ object NCEnhanceManager extends NCService {
                         // TODO: text
                         s"Models '$mdlId' has intents: [${ids.mkString(", ")}] with too small intents samples count." +
                             s"Potentially it can be not enough for suggestions service high quality work. " +
-                            s"Try to increase their count at least to $MIN_CNT_INTENT."
+                            s"Try to increase their count at least to $SUGGEST_SYNONYMS_MIN_CNT_INTENT."
             }
 
             val parser = new NCMacroParser()
@@ -258,8 +271,9 @@ object NCEnhanceManager extends NCService {
             val cnt = new AtomicInteger(0)
 
             val client = HttpClients.createDefault
+            val err = new AtomicReference[Throwable]()
 
-            for ((elemId, reqs) ← allReqs; batch ← reqs.sliding(SUGGS_BATCH_SIZE, SUGGS_BATCH_SIZE).map(_.toSeq)) {
+            for ((elemId, reqs) ← allReqs; batch ← reqs.sliding(SUGGEST_SYNONYMS_BATCH_SIZE, SUGGEST_SYNONYMS_BATCH_SIZE).map(_.toSeq)) {
                 NCUtils.asFuture(
                     _ ⇒ {
                         val post = new HttpPost(url)
@@ -274,7 +288,7 @@ object NCEnhanceManager extends NCService {
                                         // ContextWord server range is (0, 2), input range is (0, 1)
                                         min_score = Config.suggestionsMinScore * 2,
                                         // We set big limit value and in fact only minimal score is taken into account.
-                                        limit = SUGGS_MAX_LIMIT
+                                        limit = SUGGEST_SYNONYMS_MAX_LIMIT
                                     )
                                 ),
                                 "UTF-8"
@@ -303,7 +317,7 @@ object NCEnhanceManager extends NCService {
                             cdl.countDown()
                     },
                     (e: Throwable) ⇒ {
-                        logger.error("Error execution request", e)
+                        err.compareAndSet(null, e)
 
                         cdl.countDown()
                     },
@@ -313,6 +327,9 @@ object NCEnhanceManager extends NCService {
 
             cdl.await(Long.MaxValue, TimeUnit.MILLISECONDS)
 
+            if (err.get() != null)
+                throw new NCE("Error during work with ContextWord Server", err.get())
+
             val allSynsStems = elemSyns.flatMap(_._2).flatten.map(_.stem).toSet
 
             val nonEmptySuggs = allSuggs.asScala.map(p ⇒ p._1 → p._2.asScala).filter(_._2.nonEmpty)
@@ -375,13 +392,13 @@ object NCEnhanceManager extends NCService {
             })
 
             Response(
-                warnings = if (warns.isEmpty) None else Some(warns),
+                warnings = norm(warns),
                 suggestions = Some(res.map(p ⇒ p._1 → p._2.asJava).asJava)
             )
         }
 
-    private def checkMacros(mdlId: String, parent: Span = null): Response =
-        startScopedSpan("suggest", parent, "modelId" → mdlId) { _ ⇒
+    private def validateMacros(mdlId: String, parent: Span = null): Response =
+        startScopedSpan("validateMacros", parent, "modelId" → mdlId) { _ ⇒
             val mdl = NCProbeManager.getModel(mdlId)
             val syns = mdl.elementsSynonyms.values.flatten
 
@@ -390,17 +407,52 @@ object NCEnhanceManager extends NCService {
                 flatMap(m ⇒ if (syns.exists(_.contains(m))) None else Some(s"Macro is not used: $m")).
                 toSeq
 
-            Response(warnings = if (warns.isEmpty) None else Some(warns))
+            Response(warnings = norm(warns))
     }
 
+
+    private def validateSynonyms(mdlId: String, parent: Span = null): Response =
+        startScopedSpan("validateSynonyms", parent, "modelId" → mdlId) { _ ⇒
+            val warns = mutable.ArrayBuffer.empty[String]
+
+            val mdl = NCProbeManager.getModel(mdlId)
+
+            val parser = new NCMacroParser()
+
+            mdl.macros.foreach { case (name, str) ⇒ parser.addMacro(name, str) }
+
+
+            val mdlSyns: Map[String, Seq[String]] =
+                mdl.elementsSynonyms.map { case (elemId, syns) ⇒ elemId → syns.flatMap(parser.expand) }
+
+            mdlSyns.foreach { case (elemId, syns) ⇒
+                val size = syns.size
+
+                if (size == 0)
+                    warns += s"Element: '$elemId' doesn't have synonyms"
+                else if (size > VALIDATION_SYNONYMS_MANY_SYNS)
+                    warns += s"Element: '$elemId' have too many synonyms: $size"
+
+                val others = mdlSyns.filter { case (othId, _) ⇒ othId != elemId}
+
+                val intersects = others.filter { case (_, othSyns) ⇒ othSyns.intersect(syns).nonEmpty }.keys.mkString(",")
+
+                if (intersects.nonEmpty)
+                    warns += s"Element: '$elemId' has same synonyms with '$intersects'"
+            }
+
+            Response(warnings = norm(warns))
+        }
+
     @throws[NCE]
     def enhance(mdlId: String, types: Seq[NCEnhanceType], parent: Span = null): Seq[NCEnhanceResponse] =
         startScopedSpan("enhance", parent, "modelId" → mdlId) { _ ⇒
             // Note that NCEnhanceResponse#suggestions should be simple types or java collections.
             // Scala collections cannot be simple converted into JSON (REST calls)
             types.map {
-                case t@ELEMENTS_SYNONYMS ⇒ convert(t, suggest(mdlId, parent))
-                case t@VALIDATION_MACROS ⇒ convert(t, checkMacros(mdlId, parent))
+                case t@SUGGEST_SYNONYMS ⇒ convert(t, suggestSynonyms(mdlId, parent))
+                case t@VALIDATION_MACROS ⇒ convert(t, validateMacros(mdlId, parent))
+                case t@VALIDATION_SYNONYMS ⇒ convert(t, validateSynonyms(mdlId, parent))
             }
         }
 }
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/model/NCEnhanceType.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/model/NCEnhanceType.scala
index 4bacc20..a549e00 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/model/NCEnhanceType.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/model/NCEnhanceType.scala
@@ -20,6 +20,7 @@ package org.apache.nlpcraft.server.model
 object NCEnhanceType extends Enumeration {
     type NCEnhanceType = Value
 
-    val ELEMENTS_SYNONYMS = Value
+    val SUGGEST_SYNONYMS = Value
     val VALIDATION_MACROS = Value
+    val VALIDATION_SYNONYMS = Value
 }
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/rest/NCBasicRestApi.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/rest/NCBasicRestApi.scala
index 2347807..5785f30 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/rest/NCBasicRestApi.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/rest/NCBasicRestApi.scala
@@ -642,13 +642,16 @@ class NCBasicRestApi extends NCRestApi with LazyLogging with NCOpenCensusTrace w
                 checkLength("mdlId", req.mdlId, 32)
 
                 val types =
-                    req.types.map(typ ⇒
-                        try
-                            NCEnhanceType.withName(typ)
-                        catch {
-                            case _: Exception ⇒ throw InvalidField("types")
-                        }
-                    )
+                    if (req.types.size == 1 && req.types.head.toLowerCase == "all")
+                        NCEnhanceType.values.toSeq
+                    else
+                        req.types.map(typ ⇒
+                            try
+                                NCEnhanceType.withName(typ.toUpperCase)
+                            catch {
+                                case _: Exception ⇒ throw InvalidField("types")
+                            }
+                        )
 
                 val admin = authenticateAsAdmin(req.acsTok)

[incubator-nlpcraft] 01/02: WIP.

Posted by se...@apache.org.

This is an automated email from the ASF dual-hosted git repository.

sergeykamov pushed a commit to branch NLPCRAFT-41
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git

commit fb0f5c7cc6a3748f550b0aa901de12d4de23b650
Author: Sergey Kamov <se...@apache.org>
AuthorDate: Tue Aug 25 10:43:02 2020 +0300

    WIP.
---
 .../nlpcraft/server/model/NCEnhanceManager.scala   | 36 +++++++++++++++++++---
 .../nlpcraft/server/model/NCEnhanceType.scala      |  2 +-
 2 files changed, 32 insertions(+), 6 deletions(-)

diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/model/NCEnhanceManager.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/model/NCEnhanceManager.scala
index 9aad487..be74437 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/model/NCEnhanceManager.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/model/NCEnhanceManager.scala
@@ -81,6 +81,12 @@ object NCEnhanceManager extends NCService {
         )
     }
 
+    case class Response(
+        errors: Option[Seq[String]] = None,
+        warnings: Option[Seq[String]] = None,
+        suggestions: Option[AnyRef] = None
+    )
+
     private final val GSON = new Gson
     private final val TYPE_RESP = new TypeToken[JList[JList[Suggestion]]]() {}.getType
     private final val SEPARATORS = Seq('?', ',', '.', '-', '!')
@@ -125,11 +131,19 @@ object NCEnhanceManager extends NCService {
     }
 
     /**
+      *
+      * @param typ
+      * @param resp
+      */
+    private def convert(typ: NCEnhanceType, resp: Response): NCEnhanceResponse =
+        NCEnhanceResponse(typ, resp.errors, resp.warnings, resp.suggestions)
+
+    /**
       * @param mdlId Model ID.
       * @param parent Parent.
       */
     @throws[NCE]
-    private def suggest(mdlId: String, parent: Span = null): NCEnhanceResponse =
+    private def suggest(mdlId: String, parent: Span = null): Response =
         startScopedSpan("suggest", parent, "modelId" → mdlId) { _ ⇒
             val url = s"${Config.urlOpt.getOrElse(throw new NCE("Context word server is not configured"))}/suggestions"
 
@@ -360,21 +374,33 @@ object NCEnhanceManager extends NCService {
                 }
             })
 
-            NCEnhanceResponse(
-                ELEMENTS_SYNONYMS,
+            Response(
                 warnings = if (warns.isEmpty) None else Some(warns),
                 suggestions = Some(res.map(p ⇒ p._1 → p._2.asJava).asJava)
             )
         }
 
+    private def checkMacros(mdlId: String, parent: Span = null): Response =
+        startScopedSpan("suggest", parent, "modelId" → mdlId) { _ ⇒
+            val mdl = NCProbeManager.getModel(mdlId)
+            val syns = mdl.elementsSynonyms.values.flatten
+
+            // TODO: is it valid?
+            val warns = mdl.macros.keys.
+                flatMap(m ⇒ if (syns.exists(_.contains(m))) None else Some(s"Macro is not used: $m")).
+                toSeq
+
+            Response(warnings = if (warns.isEmpty) None else Some(warns))
+    }
+
     @throws[NCE]
     def enhance(mdlId: String, types: Seq[NCEnhanceType], parent: Span = null): Seq[NCEnhanceResponse] =
         startScopedSpan("enhance", parent, "modelId" → mdlId) { _ ⇒
             // Note that NCEnhanceResponse#suggestions should be simple types or java collections.
             // Scala collections cannot be simple converted into JSON (REST calls)
             types.map {
-                case typ@ELEMENTS_SYNONYMS ⇒ suggest(mdlId, parent)
-                case typ@VALIDATION_ELEMENTS ⇒ NCEnhanceResponse(typ, null)
+                case t@ELEMENTS_SYNONYMS ⇒ convert(t, suggest(mdlId, parent))
+                case t@VALIDATION_MACROS ⇒ convert(t, checkMacros(mdlId, parent))
             }
         }
 }
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/model/NCEnhanceType.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/model/NCEnhanceType.scala
index 0aba13d..4bacc20 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/model/NCEnhanceType.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/model/NCEnhanceType.scala
@@ -21,5 +21,5 @@ object NCEnhanceType extends Enumeration {
     type NCEnhanceType = Value
 
     val ELEMENTS_SYNONYMS = Value
-    val VALIDATION_ELEMENTS = Value
+    val VALIDATION_MACROS = Value
 }