You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nlpcraft.apache.org by se...@apache.org on 2021/03/12 14:39:16 UTC

[incubator-nlpcraft] branch NLPCRAFT-268 created (now 8af9171)

This is an automated email from the ASF dual-hosted git repository.

sergeykamov pushed a change to branch NLPCRAFT-268
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git.


      at 8af9171  Similar sentences (with different 'direct' synonyms property) processing fixes.

This branch includes the following new commits:

     new 8af9171  Similar sentences (with different 'direct' synonyms property) processing fixes.

The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.



[incubator-nlpcraft] 01/01: Similar sentences (with different 'direct' synonyms property) processing fixes.

Posted by se...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

sergeykamov pushed a commit to branch NLPCRAFT-268
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git

commit 8af9171647bca771d48f68f943d57658900f6d0c
Author: Sergey Kamov <sk...@gmail.com>
AuthorDate: Fri Mar 12 17:39:03 2021 +0300

    Similar sentences (with different 'direct' synonyms property) processing fixes.
---
 .../probe/mgrs/sentence/NCSentenceManager.scala    | 38 +++++++---------------
 1 file changed, 11 insertions(+), 27 deletions(-)

diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/sentence/NCSentenceManager.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/sentence/NCSentenceManager.scala
index 60b873b..47fffea 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/sentence/NCSentenceManager.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/sentence/NCSentenceManager.scala
@@ -698,37 +698,21 @@ object NCSentenceManager extends NCService {
 
                 // It removes sentences which have only one difference - 'direct' flag of their user tokens.
                 // `Direct` sentences have higher priority.
-                case class Key(sysNotes: Seq[Map[String, JSerializable]], userNotes: Seq[Map[String, JSerializable]])
-                case class Value(sentence: NCNlpSentence, directCount: Int)
+                type Key = Seq[Map[String, JSerializable]]
+                case class Holder(key: Key, sentence: NCNlpSentence, factor: Int)
 
-                val m = mutable.HashMap.empty[Key, Value]
-
-                seqSens.par.map(sen ⇒ {
+                def mkHolder(sen: NCNlpSentence): Holder = {
                     val notes = sen.flatten
 
-                    val sysNotes = notes.filter(_.isSystem)
-                    val nlpNotes = notes.filter(_.isNlp)
-                    val userNotes = notes.filter(_.isUser)
-
-                    def get(seq: Seq[NCNlpSentenceNote]): Seq[Map[String, JSerializable]] =
-                        seq.map(p ⇒
-                            // We have to delete some keys to have possibility to compare sentences.
-                            p.clone().filter(_._1 != "direct")
-                        )
-
-                    (Key(get(sysNotes), get(userNotes)), sen, nlpNotes.map(p ⇒ if (p.isDirect) 0 else 1).sum)
-                }).seq.
-                    foreach { case (key, sen, directCnt) ⇒
-                        m.get(key) match {
-                            case Some(v) ⇒
-                                // Best sentence is sentence with `direct` synonyms.
-                                if (v.directCount > directCnt)
-                                    m += key → Value(sen, directCnt)
-                            case None ⇒ m += key → Value(sen, directCnt)
-                        }
-                    }
+                    Holder(
+                        // We have to delete some keys to have possibility to compare sentences.
+                        notes.map(_.clone().filter { case (name, _) ⇒ name != "direct" }),
+                        sen,
+                        notes.filter(_.isNlp).map(p ⇒ if (p.isDirect) 0 else 1).sum
+                    )
+                }
 
-                m.values.map(_.sentence).toSeq
+                seqSens.par.map(mkHolder).seq.groupBy(_.key).map { case (_, seq) ⇒ seq.minBy(_.factor).sentence }.toSeq
             }
             else
                 collapse0(sen).flatMap(p ⇒ Option(Seq(p))).getOrElse(Seq.empty)