You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nlpcraft.apache.org by se...@apache.org on 2021/02/20 13:50:01 UTC
[incubator-nlpcraft] branch NLPCRAFT-246 updated: WIP.
This is an automated email from the ASF dual-hosted git repository.
sergeykamov pushed a commit to branch NLPCRAFT-246
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
The following commit(s) were added to refs/heads/NLPCRAFT-246 by this push:
new a71aad3 WIP.
a71aad3 is described below
commit a71aad3908f71041949f0725dfe26776e5c6a4ec
Author: Sergey Kamov <sk...@gmail.com>
AuthorDate: Sat Feb 20 16:11:09 2021 +0300
WIP.
---
.../apache/nlpcraft/common/nlp/NCNlpSentence.scala | 158 ++++++++++++++-------
.../model/NCEnricherNestedModelSpec3.scala | 4 +-
2 files changed, 106 insertions(+), 56 deletions(-)
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala
index c479308..23eeff6 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala
@@ -34,6 +34,54 @@ import scala.language.implicitConversions
object NCNlpSentence extends LazyLogging {
implicit def toTokens(x: NCNlpSentence): ArrayBuffer[NCNlpSentenceToken] = x.tokens
+ private case class NoteLink(note: String, indexes: Seq[Int])
+ private case class PartKey(id: String, start: Int, end: Int) {
+ private def in(i: Int): Boolean = i >= start && i <= end
+ def intersect(id: String, start: Int, end: Int): Boolean = id == this.id && (in(start) || in(end))
+ }
+
+ private def getLinks(notes: Seq[NCNlpSentenceNote]): Seq[NoteLink] = {
+ val noteLinks = mutable.ArrayBuffer.empty[NoteLink]
+
+ for (n ← notes.filter(n ⇒ n.noteType == "nlpcraft:limit" || n.noteType == "nlpcraft:references"))
+ noteLinks += NoteLink(n("note").asInstanceOf[String], n("indexes").asInstanceOf[JList[Int]].asScala)
+
+ for (n ← notes.filter(_.noteType == "nlpcraft:sort")) {
+ def add(noteName: String, idxsName: String): Unit = {
+ val names = n(noteName).asInstanceOf[JList[String]]
+ val idxsSeq = n(idxsName).asInstanceOf[JList[JList[Int]]]
+
+ require(names.size() == idxsSeq.size())
+
+ noteLinks ++=
+ (for ((name, idxs) ← names.asScala.zip(idxsSeq.asScala.map(_.asScala)))
+ yield NoteLink(name, idxs)
+ )
+ }
+
+ if (n.contains("subjnotes")) add("subjnotes", "subjindexes")
+ if (n.contains("bynotes")) add("bynotes", "byindexes")
+ }
+
+ noteLinks
+ }
+
+ private def getPartKeys(notes: NCNlpSentenceNote*): Seq[PartKey] =
+ notes.
+ filter(_.isUser).
+ flatMap(n ⇒ {
+ val optList: Option[JList[util.HashMap[String, JSerializable]]] = n.dataOpt("parts")
+
+ optList
+ }).flatMap(_.asScala).
+ map(map ⇒
+ PartKey(
+ map.get("id").asInstanceOf[String],
+ map.get("startcharindex").asInstanceOf[Int],
+ map.get("endcharindex").asInstanceOf[Int]
+ )
+ ).distinct
+
/**
*
* @param ns
@@ -509,6 +557,20 @@ class NCNlpSentence(
private def calcHash(): Int =
Seq(srvReqId, text, enabledBuiltInToks, tokens).map(_.hashCode()).foldLeft(0)((a, b) ⇒ 31 * a + b)
+ private def addDeleted(sen: NCNlpSentence, dels: Iterable[NCNlpSentenceNote]): Unit =
+ sen.deletedNotes ++= dels.map(n ⇒ {
+ val savedDelNote = n.clone()
+ val savedDelToks = n.tokenIndexes.map(idx ⇒ this(idx).clone())
+
+ val mainNotes = savedDelToks.flatten.filter(n ⇒ n.noteType != "nlpcraft:nlp" && n != savedDelNote)
+
+ // Deleted note's tokens should contains only nlp data and deleted notes.
+ for (savedDelTok ← savedDelToks; mainNote ← mainNotes)
+ savedDelTok.remove(mainNote)
+
+ savedDelNote → savedDelToks
+ })
+
// Deep copy.
override def clone(): NCNlpSentence =
new NCNlpSentence(
@@ -559,45 +621,9 @@ class NCNlpSentence(
if (!mdl.getAbstractTokens.isEmpty) {
val notes = ns.flatten
- case class Key(id: String, start: Int, end: Int) {
- private def in(i: Int): Boolean = i >= start && i <= end
- def intersect(id: String, start: Int, end: Int): Boolean = id == this.id && (in(start) || in(end))
- }
-
- val keys: Seq[Key] =
- notes.filter(_.isUser).flatMap(n ⇒ {
- val optList: Option[JList[util.HashMap[String, JSerializable]]] = n.dataOpt("parts")
-
- optList
- }).flatMap(_.asScala).map(map ⇒ Key(
- map.get("id").asInstanceOf[String],
- map.get("startcharindex").asInstanceOf[Int],
- map.get("endcharindex").asInstanceOf[Int])
- ).distinct
-
- case class NoteLink(note: String, indexes: Seq[Int])
-
- val noteLinks = mutable.ArrayBuffer.empty[NoteLink]
-
- for (n ← notes.filter(n ⇒ n.noteType == "nlpcraft:limit" || n.noteType == "nlpcraft:references"))
- noteLinks += NoteLink(n("note").asInstanceOf[String], n("indexes").asInstanceOf[JList[Int]].asScala)
- for (n ← notes.filter(_.noteType == "nlpcraft:sort")) {
- def add(noteName: String, idxsName: String): Unit = {
- val names = n(noteName).asInstanceOf[JList[String]]
- val idxsSeq = n(idxsName).asInstanceOf[JList[JList[Int]]]
-
- require(names.size() == idxsSeq.size())
-
- noteLinks ++=
- (for ((name, idxs) ← names.asScala.zip(idxsSeq.asScala.map(_.asScala)))
- yield NoteLink(name, idxs)
- )
- }
-
- if (n.contains("subjnotes")) add("subjnotes", "subjindexes")
- if (n.contains("bynotes")) add("bynotes", "byindexes")
- }
+ val keys = getPartKeys(notes :_*)
+ val noteLinks = getLinks(notes)
notes.filter(n ⇒ {
val noteToks = ns.tokens.filter(_.contains(n))
@@ -657,11 +683,42 @@ class NCNlpSentence(
redundant.foreach(this.removeNote)
- val delCombs: Seq[NCNlpSentenceNote] =
+ var delCombs: Seq[NCNlpSentenceNote] =
getNotNlpNotes(this).
flatMap(note ⇒ getNotNlpNotes(this.slice(note.tokenFrom, note.tokenTo + 1)).filter(_ != note)).
distinct
+ // Optimization. Deletes all wholly swallowed notes.
+ val links = getLinks(this.flatten)
+
+ val swallowed =
+ delCombs.
+ filter(n ⇒ !links.contains(NoteLink(n.noteType, n.tokenIndexes))).
+ filter(getPartKeys(_).isEmpty).
+ flatMap(n ⇒ {
+ val owners =
+ delCombs.
+ filter(_ != n).
+ flatMap(n1 ⇒
+ if (getPartKeys(n1).contains(
+ PartKey(
+ n.noteType,
+ this(n.tokenFrom).startCharIndex,
+ this(n.tokenTo).endCharIndex)
+ )
+ )
+ Some(n1)
+ else
+ None
+ )
+
+ if (owners.exists(_.wordIndexes == n.wordIndexes)) Some(n) else None
+ })
+
+ delCombs = delCombs.filter(p ⇒ !swallowed.contains(p))
+ addDeleted(this, swallowed)
+ swallowed.foreach(this.removeNote)
+
val toksByIdx: Seq[Seq[NCNlpSentenceNote]] =
delCombs.flatMap(note ⇒ note.wordIndexes.map(_ → note)).
groupBy { case (idx, _) ⇒ idx }.
@@ -678,7 +735,13 @@ class NCNlpSentence(
(minDelSize to delCombs.size).
flatMap(i ⇒
delCombs.combinations(i).
- filter(delComb ⇒ !toksByIdx.exists(_.count(note ⇒ !delComb.contains(note)) > 1))
+ filter(delComb ⇒
+ !toksByIdx.exists(
+ rec ⇒
+ rec.size - delCombs.size <= 1 &&
+ rec.count(note ⇒ !delComb.contains(note)) > 1
+ )
+ )
).
sortBy(_.size).
map(_.toSet).
@@ -688,20 +751,7 @@ class NCNlpSentence(
val nsClone = this.clone()
// Saves deleted notes for sentence and their tokens.
- nsClone.deletedNotes ++= delComb.map(n ⇒ {
- val savedDelNote = n.clone()
- val savedDelToks = n.tokenIndexes.map(idx ⇒ nsClone(idx).clone())
-
- val mainNotes =
- savedDelToks.flatten.filter(n ⇒ n.noteType != "nlpcraft:nlp" && n != savedDelNote)
-
- // Deleted note's tokens should contains only nlp data and deleted notes.
- for (savedDelTok ← savedDelToks; mainNote ← mainNotes)
- savedDelTok.remove(mainNote)
-
- savedDelNote → savedDelToks
- })
-
+ addDeleted(nsClone, delComb)
delComb.foreach(nsClone.removeNote)
// Has overlapped notes for some tokens.
diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCEnricherNestedModelSpec3.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCEnricherNestedModelSpec3.scala
index 7fef01d..e1fedca 100644
--- a/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCEnricherNestedModelSpec3.scala
+++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCEnricherNestedModelSpec3.scala
@@ -39,7 +39,7 @@ class NCNestedTestModel3 extends NCModelAdapter(
override def getAbstractTokens: util.Set[String] = Set("e1").asJava
override def getEnabledBuiltInTokens: util.Set[String] = Set.empty[String].asJava
- @NCIntent("intent=onE2 term(t1)={id == 'e2'}[11, 100]")
+ @NCIntent("intent=onE2 term(t1)={id == 'e2'}[12, 100]")
def onAB(ctx: NCIntentMatch): NCResult = NCResult.text("OK")
}
@@ -54,7 +54,7 @@ class NCEnricherNestedModelSpec3 extends NCTestContext {
val t = System.currentTimeMillis()
- checkIntent("a " * 11, "onE2")
+ checkIntent("a " * 12, "onE2")
println(s"Passed: ${System.currentTimeMillis() - t}")
}