You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nlpcraft.apache.org by se...@apache.org on 2021/03/12 14:39:17 UTC
[incubator-nlpcraft] 01/01: Similar sentences (with different
'direct' synonyms property) processing fixes.
This is an automated email from the ASF dual-hosted git repository.
sergeykamov pushed a commit to branch NLPCRAFT-268
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
commit 8af9171647bca771d48f68f943d57658900f6d0c
Author: Sergey Kamov <sk...@gmail.com>
AuthorDate: Fri Mar 12 17:39:03 2021 +0300
Similar sentences (with different 'direct' synonyms property) processing fixes.
---
.../probe/mgrs/sentence/NCSentenceManager.scala | 38 +++++++---------------
1 file changed, 11 insertions(+), 27 deletions(-)
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/sentence/NCSentenceManager.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/sentence/NCSentenceManager.scala
index 60b873b..47fffea 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/sentence/NCSentenceManager.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/sentence/NCSentenceManager.scala
@@ -698,37 +698,21 @@ object NCSentenceManager extends NCService {
// It removes sentences which have only one difference - 'direct' flag of their user tokens.
// `Direct` sentences have higher priority.
- case class Key(sysNotes: Seq[Map[String, JSerializable]], userNotes: Seq[Map[String, JSerializable]])
- case class Value(sentence: NCNlpSentence, directCount: Int)
+ type Key = Seq[Map[String, JSerializable]]
+ case class Holder(key: Key, sentence: NCNlpSentence, factor: Int)
- val m = mutable.HashMap.empty[Key, Value]
-
- seqSens.par.map(sen ⇒ {
+ def mkHolder(sen: NCNlpSentence): Holder = {
val notes = sen.flatten
- val sysNotes = notes.filter(_.isSystem)
- val nlpNotes = notes.filter(_.isNlp)
- val userNotes = notes.filter(_.isUser)
-
- def get(seq: Seq[NCNlpSentenceNote]): Seq[Map[String, JSerializable]] =
- seq.map(p ⇒
- // We have to delete some keys to have possibility to compare sentences.
- p.clone().filter(_._1 != "direct")
- )
-
- (Key(get(sysNotes), get(userNotes)), sen, nlpNotes.map(p ⇒ if (p.isDirect) 0 else 1).sum)
- }).seq.
- foreach { case (key, sen, directCnt) ⇒
- m.get(key) match {
- case Some(v) ⇒
- // Best sentence is sentence with `direct` synonyms.
- if (v.directCount > directCnt)
- m += key → Value(sen, directCnt)
- case None ⇒ m += key → Value(sen, directCnt)
- }
- }
+ Holder(
+ // We have to delete some keys to have possibility to compare sentences.
+ notes.map(_.clone().filter { case (name, _) ⇒ name != "direct" }),
+ sen,
+ notes.filter(_.isNlp).map(p ⇒ if (p.isDirect) 0 else 1).sum
+ )
+ }
- m.values.map(_.sentence).toSeq
+ seqSens.par.map(mkHolder).seq.groupBy(_.key).map { case (_, seq) ⇒ seq.minBy(_.factor).sentence }.toSeq
}
else
collapse0(sen).flatMap(p ⇒ Option(Seq(p))).getOrElse(Seq.empty)