You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nlpcraft.apache.org by se...@apache.org on 2021/03/12 11:58:39 UTC

[incubator-nlpcraft] branch master updated: Minor performance improvements.

This is an automated email from the ASF dual-hosted git repository.

sergeykamov pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git


The following commit(s) were added to refs/heads/master by this push:
     new dee0744  Minor performance improvements.
dee0744 is described below

commit dee0744d19e4836092299ac448ded8b6ef9c904d
Author: Sergey Kamov <sk...@gmail.com>
AuthorDate: Fri Mar 12 14:58:24 2021 +0300

    Minor performance improvements.
---
 .../mgrs/nlp/enrichers/model/NCModelEnricher.scala | 25 ++++++++++++----------
 .../probe/mgrs/sentence/NCSentenceManager.scala    | 11 +++++-----
 .../model/NCEnricherNestedModelSpec4.scala         |  2 +-
 3 files changed, 21 insertions(+), 17 deletions(-)

diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
index 26821ca..b1b5075 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
@@ -373,11 +373,16 @@ object NCModelEnricher extends NCProbeEnricher with DecorateAsScala {
 
             var permCnt = 0
 
-            val collapsedSens =
-                NCProbeVariants.convert(ns.srvReqId, mdl, NCSentenceManager.collapse(mdl.model, ns.clone())).map(_.asScala)
-            val complexesWords = ns.map(Complex(_))
-            val complexes =
-                collapsedSens.
+            lazy val complexesWords = ns.map(Complex(_))
+            lazy val complexes =
+                NCProbeVariants.
+                    convert(
+                        ns.srvReqId,
+                        mdl,
+                        NCSentenceManager.collapse(mdl.model, ns.clone())
+                    ).
+                    map(_.asScala).
+                    par.
                     flatMap(sen ⇒
                         // Tokens splitting.
                         // For example sentence "A B С D E" (5 words) processed as 3 tokens on first phase after collapsing
@@ -392,14 +397,14 @@ object NCModelEnricher extends NCProbeEnricher with DecorateAsScala {
                                 sen.flatMap(t ⇒
                                     // Single word token is not split as words - token.
                                     // Partly (not strict in) token - word.
-                                    if (senPartComb.contains(t) || t.wordIndexes.length == 1)
+                                    if (t.wordIndexes.length == 1 || senPartComb.contains(t))
                                         Seq(Complex(t))
                                     else
                                         t.wordIndexes.map(complexesWords)
                                 )
                                 // Drops without tokens (DSL part works with tokens).
                             }).filter(_.exists(_.isToken)).map(ComplexSeq(_)).distinct
-                    )
+                    ).seq
 
             val tokIdxs = ns.map(t ⇒ t → t.wordIndexes).toMap
 
@@ -433,10 +438,8 @@ object NCModelEnricher extends NCProbeEnricher with DecorateAsScala {
                                     if (rec.nonEmpty)
                                         Some(
                                             rec ++
-                                                (
-                                                    complexSeq.wordsIndexes.intersect(idxs) -- rec.flatMap(_.wordIndexes)
-
-                                                ).map(complexesWords)
+                                                (complexSeq.wordsIndexes.intersect(idxs) -- rec.flatMap(_.wordIndexes)).
+                                                    map(complexesWords)
                                         )
                                     else
                                         None
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/sentence/NCSentenceManager.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/sentence/NCSentenceManager.scala
index 470776c..60b873b 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/sentence/NCSentenceManager.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/sentence/NCSentenceManager.scala
@@ -680,8 +680,9 @@ object NCSentenceManager extends NCService {
                         map { case (_, seq) ⇒ seq.map { case (_, note) ⇒ note }.toSet }.
                         toSeq.sortBy(-_.size)
 
-                val sens =
+                val seqSens =
                     NCSentenceHelper.findCombinations(toksByIdx.map(_.asJava).asJava, pool).asScala.map(_.asScala).
+                        par.
                         flatMap(delComb ⇒ {
                             val nsClone = sen.clone()
 
@@ -693,7 +694,7 @@ object NCSentenceManager extends NCService {
                             require(!nsClone.exists(_.count(!_.isNlp) > 1))
 
                             collapse0(nsClone)
-                        })
+                        }).seq
 
                 // It removes sentences which have only one difference - 'direct' flag of their user tokens.
                 // `Direct` sentences have higher priority.
@@ -702,7 +703,7 @@ object NCSentenceManager extends NCService {
 
                 val m = mutable.HashMap.empty[Key, Value]
 
-                sens.map(sen ⇒ {
+                seqSens.par.map(sen ⇒ {
                     val notes = sen.flatten
 
                     val sysNotes = notes.filter(_.isSystem)
@@ -716,7 +717,7 @@ object NCSentenceManager extends NCService {
                         )
 
                     (Key(get(sysNotes), get(userNotes)), sen, nlpNotes.map(p ⇒ if (p.isDirect) 0 else 1).sum)
-                }).
+                }).seq.
                     foreach { case (key, sen, directCnt) ⇒
                         m.get(key) match {
                             case Some(v) ⇒
@@ -734,7 +735,7 @@ object NCSentenceManager extends NCService {
 
         sens = sens.distinct
 
-        sens.foreach(sen ⇒
+        sens.par.foreach(sen ⇒
             sen.foreach(tok ⇒
                 tok.size match {
                     case 1 ⇒ require(tok.head.isNlp, s"Unexpected non-'nlpcraft:nlp' token: $tok")
diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCEnricherNestedModelSpec4.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCEnricherNestedModelSpec4.scala
index b240a47..9c696c2 100644
--- a/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCEnricherNestedModelSpec4.scala
+++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCEnricherNestedModelSpec4.scala
@@ -49,5 +49,5 @@ class NCNestedTestModel4 extends NCModelAdapter(
 @NCTestEnvironment(model = classOf[NCNestedTestModel4], startClient = true)
 class NCEnricherNestedModelSpec4 extends NCTestContext {
     @Test
-    def test(): Unit = checkIntent("the a " * 9, "onE2")
+    def test(): Unit = checkIntent("the a " * 11, "onE2")
 }
\ No newline at end of file