You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nlpcraft.apache.org by se...@apache.org on 2021/03/12 14:39:17 UTC

[incubator-nlpcraft] 01/01: Similar sentences (with different 'direct' synonyms property) processing fixes.

This is an automated email from the ASF dual-hosted git repository.

sergeykamov pushed a commit to branch NLPCRAFT-268
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git

commit 8af9171647bca771d48f68f943d57658900f6d0c
Author: Sergey Kamov <sk...@gmail.com>
AuthorDate: Fri Mar 12 17:39:03 2021 +0300

    Similar sentences (with different 'direct' synonyms property) processing fixes.
---
 .../probe/mgrs/sentence/NCSentenceManager.scala    | 38 +++++++---------------
 1 file changed, 11 insertions(+), 27 deletions(-)

diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/sentence/NCSentenceManager.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/sentence/NCSentenceManager.scala
index 60b873b..47fffea 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/sentence/NCSentenceManager.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/sentence/NCSentenceManager.scala
@@ -698,37 +698,21 @@ object NCSentenceManager extends NCService {
 
                 // It removes sentences which have only one difference - 'direct' flag of their user tokens.
                 // `Direct` sentences have higher priority.
-                case class Key(sysNotes: Seq[Map[String, JSerializable]], userNotes: Seq[Map[String, JSerializable]])
-                case class Value(sentence: NCNlpSentence, directCount: Int)
+                type Key = Seq[Map[String, JSerializable]]
+                case class Holder(key: Key, sentence: NCNlpSentence, factor: Int)
 
-                val m = mutable.HashMap.empty[Key, Value]
-
-                seqSens.par.map(sen ⇒ {
+                def mkHolder(sen: NCNlpSentence): Holder = {
                     val notes = sen.flatten
 
-                    val sysNotes = notes.filter(_.isSystem)
-                    val nlpNotes = notes.filter(_.isNlp)
-                    val userNotes = notes.filter(_.isUser)
-
-                    def get(seq: Seq[NCNlpSentenceNote]): Seq[Map[String, JSerializable]] =
-                        seq.map(p ⇒
-                            // We have to delete some keys to have possibility to compare sentences.
-                            p.clone().filter(_._1 != "direct")
-                        )
-
-                    (Key(get(sysNotes), get(userNotes)), sen, nlpNotes.map(p ⇒ if (p.isDirect) 0 else 1).sum)
-                }).seq.
-                    foreach { case (key, sen, directCnt) ⇒
-                        m.get(key) match {
-                            case Some(v) ⇒
-                                // Best sentence is sentence with `direct` synonyms.
-                                if (v.directCount > directCnt)
-                                    m += key → Value(sen, directCnt)
-                            case None ⇒ m += key → Value(sen, directCnt)
-                        }
-                    }
+                    Holder(
+                        // We have to delete some keys to have possibility to compare sentences.
+                        notes.map(_.clone().filter { case (name, _) ⇒ name != "direct" }),
+                        sen,
+                        notes.filter(_.isNlp).map(p ⇒ if (p.isDirect) 0 else 1).sum
+                    )
+                }
 
-                m.values.map(_.sentence).toSeq
+                seqSens.par.map(mkHolder).seq.groupBy(_.key).map { case (_, seq) ⇒ seq.minBy(_.factor).sentence }.toSeq
             }
             else
                 collapse0(sen).flatMap(p ⇒ Option(Seq(p))).getOrElse(Seq.empty)