You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nlpcraft.apache.org by se...@apache.org on 2020/03/14 07:00:33 UTC
[incubator-nlpcraft] 01/13: WIP.
This is an automated email from the ASF dual-hosted git repository.
sergeykamov pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
commit 17776cd96fe6ace895839b7b38c7737c7f0c509c
Author: Sergey Kamov <se...@apache.org>
AuthorDate: Wed Mar 11 18:34:15 2020 +0300
WIP.
---
.../nlpcraft/probe/mgrs/nlp/NCProbeEnricher.scala | 21 ++-
.../aggregation/NCAggregationEnricher.scala | 2 +-
.../mgrs/nlp/enrichers/limit/NCLimitEnricher.scala | 2 +-
.../enrichers/relation/NCRelationEnricher.scala | 2 +-
.../mgrs/nlp/enrichers/sort/NCSortEnricher.scala | 157 +++++++++++----------
5 files changed, 103 insertions(+), 81 deletions(-)
diff --git a/src/main/scala/org/nlpcraft/probe/mgrs/nlp/NCProbeEnricher.scala b/src/main/scala/org/nlpcraft/probe/mgrs/nlp/NCProbeEnricher.scala
index 0c6ca9e..c632c2e 100644
--- a/src/main/scala/org/nlpcraft/probe/mgrs/nlp/NCProbeEnricher.scala
+++ b/src/main/scala/org/nlpcraft/probe/mgrs/nlp/NCProbeEnricher.scala
@@ -27,6 +27,8 @@ import org.nlpcraft.probe.mgrs.NCModelDecorator
import scala.collection.{Map, Seq}
import scala.language.implicitConversions
+import scala.collection.JavaConverters._
+
/**
* Base class for NLP enricher.
*/
@@ -75,12 +77,25 @@ abstract class NCProbeEnricher extends NCService with LazyLogging {
/**
*
* @param typ
- * @param refNote
+ * @param refNoteName
+ * @param refNoteVal
+ * @param matched
+ */
+ protected def hasReference(typ: String, refNoteName: String, refNoteVal: String, matched: Seq[NCNlpSentenceToken]): Boolean =
+ matched.forall(t ⇒
+ t.isTypeOf(typ) && t.getNotes(typ).exists(n ⇒ n(refNoteName).asInstanceOf[String] == refNoteVal)
+ )
+
+ /**
+ *
+ * @param typ
+ * @param refNoteName
+ * @param refNoteVals
* @param matched
*/
- protected def isReference(typ: String, refNote: String, matched: Seq[NCNlpSentenceToken]): Boolean =
+ protected def hasReferences(typ: String, refNoteName: String, refNoteVals: Seq[String], matched: Seq[NCNlpSentenceToken]): Boolean =
matched.forall(t ⇒
- t.isTypeOf(typ) && t.getNotes(typ).exists(n ⇒ n("note").asInstanceOf[String] == refNote)
+ t.isTypeOf(typ) && t.getNotes(typ).exists(n ⇒ n(refNoteName).asInstanceOf[java.util.List[String]].asScala == refNoteVals)
)
/**
diff --git a/src/main/scala/org/nlpcraft/probe/mgrs/nlp/enrichers/aggregation/NCAggregationEnricher.scala b/src/main/scala/org/nlpcraft/probe/mgrs/nlp/enrichers/aggregation/NCAggregationEnricher.scala
index 2195363..6fa6a2a 100644
--- a/src/main/scala/org/nlpcraft/probe/mgrs/nlp/enrichers/aggregation/NCAggregationEnricher.scala
+++ b/src/main/scala/org/nlpcraft/probe/mgrs/nlp/enrichers/aggregation/NCAggregationEnricher.scala
@@ -99,7 +99,7 @@ object NCAggregationEnricher extends NCProbeEnricher {
for (toks ← ns.tokenMixWithStopWords() if areSuitableTokens(buf, toks))
tryToMatch(toks) match {
case Some(m) ⇒
- for (refNote ← m.refNotes if !isReference(TOK_ID, refNote, m.matched)) {
+ for (refNote ← m.refNotes if !hasReference(TOK_ID, "note", refNote, m.matched)) {
val note = NCNlpSentenceNote(
m.matched.map(_.index),
TOK_ID,
diff --git a/src/main/scala/org/nlpcraft/probe/mgrs/nlp/enrichers/limit/NCLimitEnricher.scala b/src/main/scala/org/nlpcraft/probe/mgrs/nlp/enrichers/limit/NCLimitEnricher.scala
index b57fcf3..bcde957 100644
--- a/src/main/scala/org/nlpcraft/probe/mgrs/nlp/enrichers/limit/NCLimitEnricher.scala
+++ b/src/main/scala/org/nlpcraft/probe/mgrs/nlp/enrichers/limit/NCLimitEnricher.scala
@@ -210,7 +210,7 @@ object NCLimitEnricher extends NCProbeEnricher {
for (toks ← ns.tokenMixWithStopWords().sortBy(p ⇒ (-p.size, -p.head.index)) if areSuitableTokens(buf, toks))
tryToMatch(numsMap, groupsMap, toks) match {
case Some(m) ⇒
- for (refNote ← m.refNotes if !isReference(TOK_ID, refNote, m.matched)) {
+ for (refNote ← m.refNotes if !hasReference(TOK_ID, "note", refNote, m.matched)) {
val note = NCNlpSentenceNote(
m.matched.map(_.index),
TOK_ID,
diff --git a/src/main/scala/org/nlpcraft/probe/mgrs/nlp/enrichers/relation/NCRelationEnricher.scala b/src/main/scala/org/nlpcraft/probe/mgrs/nlp/enrichers/relation/NCRelationEnricher.scala
index 0613a51..2284766 100644
--- a/src/main/scala/org/nlpcraft/probe/mgrs/nlp/enrichers/relation/NCRelationEnricher.scala
+++ b/src/main/scala/org/nlpcraft/probe/mgrs/nlp/enrichers/relation/NCRelationEnricher.scala
@@ -129,7 +129,7 @@ object NCRelationEnricher extends NCProbeEnricher {
for (toks ← ns.tokenMixWithStopWords() if areSuitableTokens(buf, toks))
tryToMatch(toks) match {
case Some(m) ⇒
- for (refNote ← m.refNotes if !isReference(TOK_ID, refNote, Seq(m.matched.head))) {
+ for (refNote ← m.refNotes if !hasReference(TOK_ID, "note", refNote, Seq(m.matched.head))) {
val note = NCNlpSentenceNote(
Seq(m.matchedHead.index),
TOK_ID,
diff --git a/src/main/scala/org/nlpcraft/probe/mgrs/nlp/enrichers/sort/NCSortEnricher.scala b/src/main/scala/org/nlpcraft/probe/mgrs/nlp/enrichers/sort/NCSortEnricher.scala
index 7e9de2c..8678c7d 100644
--- a/src/main/scala/org/nlpcraft/probe/mgrs/nlp/enrichers/sort/NCSortEnricher.scala
+++ b/src/main/scala/org/nlpcraft/probe/mgrs/nlp/enrichers/sort/NCSortEnricher.scala
@@ -28,6 +28,7 @@ import org.nlpcraft.probe.mgrs.NCModelDecorator
import org.nlpcraft.probe.mgrs.nlp.NCProbeEnricher
import scala.collection.JavaConverters._
+import scala.collection.mutable.ArrayBuffer
import scala.collection.{Map, Seq, mutable}
/**
@@ -103,10 +104,13 @@ object NCSortEnricher extends NCProbeEnricher {
asc: Option[Boolean],
main: Seq[NCNlpSentenceToken],
stop: Seq[NCNlpSentenceToken],
- subj: Seq[Seq[NoteData]],
- by: Seq[Seq[NoteData]]
+ subjSeq: Seq[Seq[NoteData]],
+ bySeq: Seq[Seq[NoteData]]
) {
- lazy val all = main ++ stop
+ require(main.nonEmpty)
+ require(subjSeq.nonEmpty)
+
+ lazy val all: Seq[NCNlpSentenceToken] = main ++ stop
}
override def start(parent: Span = null): NCService = startScopedSpan("start", parent) { _ ⇒
@@ -121,38 +125,32 @@ object NCSortEnricher extends NCProbeEnricher {
// [Token(A, B), Token(A), Token(C, D), Token(C, D, X), Token(Z)] ⇒
// [[A (0, 1), C (2, 3), Z (4)], [A (0, 1), D (2, 3), Z (4)]]
private def split(toks: Seq[NCNlpSentenceToken]): Seq[Seq[NoteData]] = {
- val all = toks.
+ val all: Seq[NoteData] = toks.
flatten.
- filter(n ⇒ !n.isNlp).
+ filter(!_.isNlp).
map(n ⇒ NoteData(n.noteType, n.tokenFrom to n.tokenTo)).
sortBy(_.indexes.head)
- val res = mutable.ArrayBuffer.empty[Seq[NoteData]]
- val used = mutable.ArrayBuffer.empty[NoteData]
+ if (all.nonEmpty) {
+ val res = mutable.ArrayBuffer.empty[Seq[NoteData]]
- def go(seq: mutable.ArrayBuffer[NoteData], nd: NoteData): Boolean =
- if (!used.contains(nd)) {
- if (seq.isEmpty) {
- if (nd.indexes.head == 0) {
- seq += nd
- used += nd
+ def go(nd: NoteData, seq: mutable.ArrayBuffer[NoteData] = mutable.ArrayBuffer.empty[NoteData]): Unit = {
+ seq += nd
- all.find(nd ⇒ !used.contains(nd)) match {
- case Some(next) ⇒ go(seq, next)
- case None ⇒ false
- }
- }
- else
- false
- }
- else {
- false
- }
+ all.
+ filter(p ⇒ p.indexes.head == nd.indexes.last + 1).
+ foreach(go(_, mutable.ArrayBuffer.empty[NoteData] ++ seq.clone()))
+
+ if (seq.nonEmpty && seq.head.indexes.head == toks.head.index && seq.last.indexes.last == toks.last.index)
+ res += seq
}
- else
- false
- res
+ go(all.head)
+
+ res
+ }
+ else
+ Seq.empty
}
private def tryToMatch(toks: Seq[NCNlpSentenceToken]): Option[Match] = {
@@ -200,6 +198,7 @@ object NCSortEnricher extends NCProbeEnricher {
case None ⇒ None
}
+
hOpt match {
case Some(h) ⇒
val others = toks.filter(t ⇒ !h.all.contains(t))
@@ -222,19 +221,16 @@ object NCSortEnricher extends NCProbeEnricher {
require(subj.nonEmpty)
- val asc =
- h.order match {
- case Some(order) ⇒ Some(ORDER(order.synonymIndex)._2)
- case None ⇒ None
- }
-
Some(
Match(
- asc = asc,
+ asc = h.order match {
+ case Some(order) ⇒ Some(ORDER(order.synonymIndex)._2)
+ case None ⇒ None
+ },
main = h.sort.tokens,
stop = h.byTokens ++ h.orderTokens,
- subj = split(subj),
- by = split(by)
+ subjSeq = split(subj),
+ bySeq = split(by)
)
)
}
@@ -244,9 +240,9 @@ object NCSortEnricher extends NCProbeEnricher {
}
}
-// def suitable(m: Match, notes: Seq[String], refName: String): Boolean =
-// notes.forall(note ⇒ !isReference(TOK_ID, refName, note, m.all))
-//
+ // TODO:
+ private def suitable(m: Match, notes: Seq[String], refName: String): Boolean =
+ !hasReferences(TOK_ID, refName, notes, m.main)
override def enrich(mdl: NCModelDecorator, ns: NCNlpSentence, senMeta: Map[String, Serializable], parent: Span): Boolean =
startScopedSpan("enrich", parent,
@@ -256,42 +252,53 @@ object NCSortEnricher extends NCProbeEnricher {
val buf = mutable.Buffer.empty[Set[NCNlpSentenceToken]]
var changed: Boolean = false
-// for (toks ← ns.tokenMixWithStopWords() if areSuitableTokens(buf, toks))
-// tryToMatch(toks) match {
-// case Some(m)
-//// if suitable(m, m.subj.map(_.note), "subjNotes") &&
-//// (m.by.isEmpty || suitable(m, m.by.map(_.note), "byNotes")) ⇒
-// ⇒
-// val params = mutable.ArrayBuffer.empty[(String, Any)]
-//
-// m.asc match {
-// case Some(asc) ⇒ params += "asc" → asc
-// case None ⇒ // No-op.
-// }
-//
-// def addNotes(seq: Seq[NoteData], notesName: String, idxsName: String): Unit = {
-// params += notesName → seq.map(_.note).asJava
-// params += idxsName → seq.map(_.indexes.asJava).asJava
-// }
-//
-// addNotes(m.subj, "subjNotes", "subjIndexes")
-//
-// if (m.by.nonEmpty)
-// addNotes(m.by, "byNotes", "byIndexes")
-//
-// val note = NCNlpSentenceNote(m.main.map(_.index), TOK_ID, params :_*)
-//
-// m.main.foreach(_.add(note))
-// m.stop.foreach(_.addStopReason(note))
-//
-// changed = true
-//
-// case None ⇒ // No-op.
-//
-// if (changed)
-// buf += toks.toSet
-// }
+ for (toks ← ns.tokenMixWithStopWords() if areSuitableTokens(buf, toks))
+ tryToMatch(toks) match {
+ case Some(m) ⇒
+ for (subj ← m.subjSeq if suitable(m, subj.map(_.note), "subjNotes")) {
+ def addNotes(
+ params: ArrayBuffer[(String, Any)],
+ seq: Seq[NoteData],
+ notesName: String,
+ idxsName: String
+ ): ArrayBuffer[(String, Any)] = {
+ params += notesName → seq.map(_.note).asJava
+ params += idxsName → seq.map(_.indexes.asJava).asJava
+
+ params
+ }
+
+ def mkParams(): ArrayBuffer[(String, Any)] = {
+ val params = mutable.ArrayBuffer.empty[(String, Any)]
+
+ if (m.asc.isDefined)
+ params += "asc" → m.asc.get
+
+ addNotes(params, subj, "subjNotes", "subjIndexes")
+ }
+
+ def mkNote(params: ArrayBuffer[(String, Any)]): Unit = {
+ val note = NCNlpSentenceNote(m.main.map(_.index), TOK_ID, params:_*)
+
+ m.main.foreach(_.add(note))
+ m.stop.foreach(_.addStopReason(note))
+
+ changed = true
+ }
+
+ if (m.bySeq.nonEmpty)
+ for (by ← m.bySeq if suitable(m, by.map(_.note), "byNotes"))
+ mkNote(addNotes(mkParams(), by, "byNotes", "byIndexes"))
+ else
+ mkNote(mkParams())
+ }
+
+ case None ⇒ // No-op.
+
+ if (changed)
+ buf += toks.toSet
+ }
changed
}
-}
+}
\ No newline at end of file