You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nlpcraft.apache.org by se...@apache.org on 2021/03/12 11:58:39 UTC
[incubator-nlpcraft] branch master updated: Minor performance
improvements.
This is an automated email from the ASF dual-hosted git repository.
sergeykamov pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
The following commit(s) were added to refs/heads/master by this push:
new dee0744 Minor performance improvements.
dee0744 is described below
commit dee0744d19e4836092299ac448ded8b6ef9c904d
Author: Sergey Kamov <sk...@gmail.com>
AuthorDate: Fri Mar 12 14:58:24 2021 +0300
Minor performance improvements.
---
.../mgrs/nlp/enrichers/model/NCModelEnricher.scala | 25 ++++++++++++----------
.../probe/mgrs/sentence/NCSentenceManager.scala | 11 +++++-----
.../model/NCEnricherNestedModelSpec4.scala | 2 +-
3 files changed, 21 insertions(+), 17 deletions(-)
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
index 26821ca..b1b5075 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
@@ -373,11 +373,16 @@ object NCModelEnricher extends NCProbeEnricher with DecorateAsScala {
var permCnt = 0
- val collapsedSens =
- NCProbeVariants.convert(ns.srvReqId, mdl, NCSentenceManager.collapse(mdl.model, ns.clone())).map(_.asScala)
- val complexesWords = ns.map(Complex(_))
- val complexes =
- collapsedSens.
+ lazy val complexesWords = ns.map(Complex(_))
+ lazy val complexes =
+ NCProbeVariants.
+ convert(
+ ns.srvReqId,
+ mdl,
+ NCSentenceManager.collapse(mdl.model, ns.clone())
+ ).
+ map(_.asScala).
+ par.
flatMap(sen ⇒
// Tokens splitting.
// For example sentence "A B С D E" (5 words) processed as 3 tokens on first phase after collapsing
@@ -392,14 +397,14 @@ object NCModelEnricher extends NCProbeEnricher with DecorateAsScala {
sen.flatMap(t ⇒
// Single word token is not split as words - token.
// Partly (not strict in) token - word.
- if (senPartComb.contains(t) || t.wordIndexes.length == 1)
+ if (t.wordIndexes.length == 1 || senPartComb.contains(t))
Seq(Complex(t))
else
t.wordIndexes.map(complexesWords)
)
// Drops without tokens (DSL part works with tokens).
}).filter(_.exists(_.isToken)).map(ComplexSeq(_)).distinct
- )
+ ).seq
val tokIdxs = ns.map(t ⇒ t → t.wordIndexes).toMap
@@ -433,10 +438,8 @@ object NCModelEnricher extends NCProbeEnricher with DecorateAsScala {
if (rec.nonEmpty)
Some(
rec ++
- (
- complexSeq.wordsIndexes.intersect(idxs) -- rec.flatMap(_.wordIndexes)
-
- ).map(complexesWords)
+ (complexSeq.wordsIndexes.intersect(idxs) -- rec.flatMap(_.wordIndexes)).
+ map(complexesWords)
)
else
None
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/sentence/NCSentenceManager.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/sentence/NCSentenceManager.scala
index 470776c..60b873b 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/sentence/NCSentenceManager.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/sentence/NCSentenceManager.scala
@@ -680,8 +680,9 @@ object NCSentenceManager extends NCService {
map { case (_, seq) ⇒ seq.map { case (_, note) ⇒ note }.toSet }.
toSeq.sortBy(-_.size)
- val sens =
+ val seqSens =
NCSentenceHelper.findCombinations(toksByIdx.map(_.asJava).asJava, pool).asScala.map(_.asScala).
+ par.
flatMap(delComb ⇒ {
val nsClone = sen.clone()
@@ -693,7 +694,7 @@ object NCSentenceManager extends NCService {
require(!nsClone.exists(_.count(!_.isNlp) > 1))
collapse0(nsClone)
- })
+ }).seq
// It removes sentences which have only one difference - 'direct' flag of their user tokens.
// `Direct` sentences have higher priority.
@@ -702,7 +703,7 @@ object NCSentenceManager extends NCService {
val m = mutable.HashMap.empty[Key, Value]
- sens.map(sen ⇒ {
+ seqSens.par.map(sen ⇒ {
val notes = sen.flatten
val sysNotes = notes.filter(_.isSystem)
@@ -716,7 +717,7 @@ object NCSentenceManager extends NCService {
)
(Key(get(sysNotes), get(userNotes)), sen, nlpNotes.map(p ⇒ if (p.isDirect) 0 else 1).sum)
- }).
+ }).seq.
foreach { case (key, sen, directCnt) ⇒
m.get(key) match {
case Some(v) ⇒
@@ -734,7 +735,7 @@ object NCSentenceManager extends NCService {
sens = sens.distinct
- sens.foreach(sen ⇒
+ sens.par.foreach(sen ⇒
sen.foreach(tok ⇒
tok.size match {
case 1 ⇒ require(tok.head.isNlp, s"Unexpected non-'nlpcraft:nlp' token: $tok")
diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCEnricherNestedModelSpec4.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCEnricherNestedModelSpec4.scala
index b240a47..9c696c2 100644
--- a/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCEnricherNestedModelSpec4.scala
+++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCEnricherNestedModelSpec4.scala
@@ -49,5 +49,5 @@ class NCNestedTestModel4 extends NCModelAdapter(
@NCTestEnvironment(model = classOf[NCNestedTestModel4], startClient = true)
class NCEnricherNestedModelSpec4 extends NCTestContext {
@Test
- def test(): Unit = checkIntent("the a " * 9, "onE2")
+ def test(): Unit = checkIntent("the a " * 11, "onE2")
}
\ No newline at end of file