You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nlpcraft.apache.org by se...@apache.org on 2020/04/05 10:10:50 UTC
[incubator-nlpcraft] branch NLPCRAFT-30 updated: SortEnricher
bugfix.
This is an automated email from the ASF dual-hosted git repository.
sergeykamov pushed a commit to branch NLPCRAFT-30
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
The following commit(s) were added to refs/heads/NLPCRAFT-30 by this push:
new 5ee71a2 SortEnricher bugfix.
5ee71a2 is described below
commit 5ee71a2a7074500d79e820d99cc149863e0bb5e9
Author: Sergey Kamov <se...@apache.org>
AuthorDate: Sun Apr 5 13:10:43 2020 +0300
SortEnricher bugfix.
---
.../mgrs/nlp/enrichers/sort/NCSortEnricher.scala | 91 +++++++++++-----------
1 file changed, 47 insertions(+), 44 deletions(-)
diff --git a/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/sort/NCSortEnricher.scala b/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/sort/NCSortEnricher.scala
index a3be897..3a9661b 100644
--- a/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/sort/NCSortEnricher.scala
+++ b/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/sort/NCSortEnricher.scala
@@ -150,60 +150,56 @@ object NCSortEnricher extends NCProbeEnricher {
*
* @param toks
*/
- private def split(toks: Seq[NCNlpSentenceToken], nullable: Boolean): Seq[Seq[NoteData]] = {
+ private def split(toks: Seq[NCNlpSentenceToken]): Seq[Seq[NoteData]] = {
+ require(toks.nonEmpty)
+
+ val min = toks.head.index
+ val max = toks.last.index
+
val all =
toks.flatten.
- filter(!_.isNlp).map(n ⇒ NoteData(n.noteType, n.tokenFrom to n.tokenTo)).
+ filter(!_.isNlp).
+ filter(n ⇒ n.tokenIndexes.head >= min && n.tokenIndexes.last <= max).
+ map(n ⇒ NoteData(n.noteType, n.tokenFrom to n.tokenTo)).
sortBy(_.indexes.head).distinct
- val res =
- if (all.nonEmpty) {
- val res = mutable.ArrayBuffer.empty[Seq[NoteData]]
-
- /**
- * Returns flag which indicates are token contiguous or not.
- *
- * @param tok1Idx First token index.
- * @param tok2Idx Second token index.
- */
- def contiguous(tok1Idx: Int, tok2Idx: Int): Boolean = {
- val between = toks.filter(t ⇒ t.index > tok1Idx && t.index < tok2Idx)
+ if (all.nonEmpty) {
+ val res = mutable.ArrayBuffer.empty[Seq[NoteData]]
- between.isEmpty || between.forall(p ⇒ p.isStopWord || p.stem == STEM_AND)
- }
-
- val min = toks.dropWhile(_.isNlp).head.index
- val max = toks.reverse.dropWhile(_.isNlp).head.index
+ /**
+ * Returns flag which indicates are token contiguous or not.
+ *
+ * @param tok1Idx First token index.
+ * @param tok2Idx Second token index.
+ */
+ def contiguous(tok1Idx: Int, tok2Idx: Int): Boolean = {
+ val between = toks.filter(t ⇒ t.index > tok1Idx && t.index < tok2Idx)
- require(min <= max)
+ between.isEmpty || between.forall(p ⇒ p.isStopWord || p.stem == STEM_AND)
+ }
- def fill(nd: NoteData, seq: mutable.ArrayBuffer[NoteData] = mutable.ArrayBuffer.empty[NoteData]): Unit = {
- seq += nd
+ val minIdx = toks.dropWhile(_.isNlp).head.index
+ val maxIdx = toks.reverse.dropWhile(_.isNlp).head.index
- all.
- filter(p ⇒ nd.indexes.last < p.indexes.head && contiguous(nd.indexes.last, p.indexes.head)).
- foreach(fill(_, mutable.ArrayBuffer.empty[NoteData] ++ seq.clone()))
+ require(minIdx <= maxIdx)
- if (seq.nonEmpty && seq.head.indexes.head == min && seq.last.indexes.last == max)
- res += seq
- }
+ def fill(nd: NoteData, seq: mutable.ArrayBuffer[NoteData] = mutable.ArrayBuffer.empty[NoteData]): Unit = {
+ seq += nd
- all.filter(_.indexes.head == min).foreach(p ⇒ fill(p))
+ all.
+ filter(p ⇒ nd.indexes.last < p.indexes.head && contiguous(nd.indexes.last, p.indexes.head)).
+ foreach(fill(_, mutable.ArrayBuffer.empty[NoteData] ++ seq.clone()))
- res
+ if (seq.nonEmpty && seq.head.indexes.head == minIdx && seq.last.indexes.last == maxIdx)
+ res += seq
}
- else
- Seq.empty
- if (res.isEmpty && !nullable)
- throw new AssertionError(s"Invalid null result " +
- s"[tokens=[${toks.map(_.origText).mkString(", ")}]" +
- s", tokensIndexes=[${toks.map(_.index).mkString(", ")}]" +
- s", allData=[${all.mkString(", ")}]" +
- s"]"
- )
+ all.filter(_.indexes.head == minIdx).foreach(p ⇒ fill(p))
- res
+ res
+ }
+ else
+ Seq.empty
}
/**
@@ -306,13 +302,20 @@ object NCSortEnricher extends NCProbeEnricher {
else
(others.filter(_.index < sepIdxs.head), others.filter(_.index > sepIdxs.last))
+ val notes = subj.flatten
+
require(subj.nonEmpty)
- val subjSeq = split(subj, nullable = false)
- val bySeq = split(by, nullable = true)
- val asc = h.order.flatMap(order ⇒ Some(ORDER(order.synonymIndex)._2))
+ val subjSeq = split(subj)
+
+ if (subjSeq.nonEmpty) {
+ val bySeq = if (by.isEmpty) Seq.empty else split(by)
+ val asc = h.order.flatMap(order ⇒ Some(ORDER(order.synonymIndex)._2))
- Some(Match(asc, main = h.sort.tokens, stop = h.byTokens ++ h.orderTokens, subjSeq, bySeq))
+ Some(Match(asc, main = h.sort.tokens, stop = h.byTokens ++ h.orderTokens, subjSeq, bySeq))
+ }
+ else
+ None
}
else
None