You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nlpcraft.apache.org by ar...@apache.org on 2020/09/28 03:45:38 UTC
[incubator-nlpcraft] 02/03: WIP.
This is an automated email from the ASF dual-hosted git repository.
aradzinski pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
commit 1c1bfad465b5c6d82daed99d25e51ba631d6ba68
Author: Aaron Radzinski <ar...@datalingvo.com>
AuthorDate: Sun Sep 27 15:35:15 2020 -0700
WIP.
---
.../dictionary/NCDictionaryEnricher.scala | 5 +++-
.../mgrs/nlp/enrichers/limit/NCLimitEnricher.scala | 5 +++-
.../mgrs/nlp/enrichers/model/NCModelEnricher.scala | 5 +++-
.../enrichers/relation/NCRelationEnricher.scala | 5 +++-
.../mgrs/nlp/enrichers/sort/NCSortEnricher.scala | 5 +++-
.../enrichers/stopword/NCStopWordEnricher.scala | 2 ++
.../suspicious/NCSuspiciousNounsEnricher.scala | 5 +++-
.../nlp/enrichers/basenlp/NCBaseNlpEnricher.scala | 2 ++
.../coordinate/NCCoordinatesEnricher.scala | 31 ++++++++++++----------
.../server/nlp/enrichers/date/NCDateEnricher.scala | 2 ++
.../server/nlp/enrichers/geo/NCGeoEnricher.scala | 5 +++-
.../nlp/enrichers/numeric/NCNumericEnricher.scala | 5 +++-
.../nlp/enrichers/quote/NCQuoteEnricher.scala | 4 ++-
.../enrichers/stopword/NCStopWordEnricher.scala | 4 ++-
14 files changed, 61 insertions(+), 24 deletions(-)
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/dictionary/NCDictionaryEnricher.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/dictionary/NCDictionaryEnricher.scala
index 2be0859..325c90c 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/dictionary/NCDictionaryEnricher.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/dictionary/NCDictionaryEnricher.scala
@@ -62,7 +62,9 @@ object NCDictionaryEnricher extends NCProbeEnricher {
}
@throws[NCE]
- override def enrich(mdl: NCProbeModel, ns: NCNlpSentence, senMeta: Map[String, Serializable], parent: Span = null): Unit =
+ override def enrich(mdl: NCProbeModel, ns: NCNlpSentence, senMeta: Map[String, Serializable], parent: Span = null): Unit = {
+ require(isStarted)
+
startScopedSpan("enrich", parent,
"srvReqId" → ns.srvReqId,
"mdlId" → mdl.model.getId,
@@ -82,4 +84,5 @@ object NCDictionaryEnricher extends NCProbeEnricher {
)
})
}
+ }
}
\ No newline at end of file
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/limit/NCLimitEnricher.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/limit/NCLimitEnricher.scala
index 2c1f713..43c3748 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/limit/NCLimitEnricher.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/limit/NCLimitEnricher.scala
@@ -246,7 +246,9 @@ object NCLimitEnricher extends NCProbeEnricher {
}
@throws[NCE]
- override def enrich(mdl: NCProbeModel, ns: NCNlpSentence, senMeta: Map[String, Serializable], parent: Span = null): Unit =
+ override def enrich(mdl: NCProbeModel, ns: NCNlpSentence, senMeta: Map[String, Serializable], parent: Span = null): Unit = {
+ require(isStarted)
+
startScopedSpan("enrich", parent,
"srvReqId" → ns.srvReqId,
"mdlId" → mdl.model.getId,
@@ -281,6 +283,7 @@ object NCLimitEnricher extends NCProbeEnricher {
case None ⇒ // No-op.
}
}
+ }
/**
*
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
index 03594d1..e2e2265 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
@@ -311,7 +311,9 @@ object NCModelEnricher extends NCProbeEnricher with DecorateAsScala {
def isComplex(mdl: NCProbeModel): Boolean = mdl.synonymsDsl.nonEmpty || !mdl.model.getParsers.isEmpty
@throws[NCE]
- override def enrich(mdl: NCProbeModel, ns: NCNlpSentence, senMeta: Map[String, Serializable], parent: Span = null): Unit =
+ override def enrich(mdl: NCProbeModel, ns: NCNlpSentence, senMeta: Map[String, Serializable], parent: Span = null): Unit = {
+ require(isStarted)
+
startScopedSpan("enrich", parent,
"srvReqId" → ns.srvReqId,
"mdlId" → mdl.model.getId,
@@ -518,4 +520,5 @@ object NCModelEnricher extends NCProbeEnricher with DecorateAsScala {
parser.onDiscard()
}
}
+ }
}
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/relation/NCRelationEnricher.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/relation/NCRelationEnricher.scala
index ff4475c..5a52f8c 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/relation/NCRelationEnricher.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/relation/NCRelationEnricher.scala
@@ -147,7 +147,9 @@ object NCRelationEnricher extends NCProbeEnricher {
}
@throws[NCE]
- override def enrich(mdl: NCProbeModel, ns: NCNlpSentence, senMeta: Map[String, Serializable], parent: Span = null): Unit =
+ override def enrich(mdl: NCProbeModel, ns: NCNlpSentence, senMeta: Map[String, Serializable], parent: Span = null): Unit = {
+ require(isStarted)
+
startScopedSpan("enrich", parent,
"srvReqId" → ns.srvReqId,
"mdlId" → mdl.model.getId,
@@ -179,6 +181,7 @@ object NCRelationEnricher extends NCProbeEnricher {
case None ⇒ // No-op.
}
}
+ }
/**
*
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/sort/NCSortEnricher.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/sort/NCSortEnricher.scala
index f549dd5..d177c10 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/sort/NCSortEnricher.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/sort/NCSortEnricher.scala
@@ -437,7 +437,9 @@ object NCSortEnricher extends NCProbeEnricher {
toks.length == toks2.length || toks.count(isImportant) == toks2.count(isImportant)
}
- override def enrich(mdl: NCProbeModel, ns: NCNlpSentence, meta: Map[String, Serializable], parent: Span): Unit =
+ override def enrich(mdl: NCProbeModel, ns: NCNlpSentence, meta: Map[String, Serializable], parent: Span): Unit = {
+ require(isStarted)
+
startScopedSpan("enrich", parent,
"srvReqId" → ns.srvReqId,
"mdlId" → mdl.model.getId,
@@ -506,6 +508,7 @@ object NCSortEnricher extends NCProbeEnricher {
}
}
}
+ }
/**
*
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/stopword/NCStopWordEnricher.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/stopword/NCStopWordEnricher.scala
index 255d91b..b386978 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/stopword/NCStopWordEnricher.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/stopword/NCStopWordEnricher.scala
@@ -218,6 +218,8 @@ object NCStopWordEnricher extends NCProbeEnricher {
@throws[NCE]
override def enrich(mdl: NCProbeModel, ns: NCNlpSentence, senMeta: Map[String, Serializable], parent: Span = null): Unit = {
+ require(isStarted)
+
def mark(stems: Set[String], f: Boolean): Unit =
ns.filter(t ⇒ stems.contains(t.stem)).foreach(t ⇒ ns.fixNote(t.getNlpNote, "stopWord" → f))
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/suspicious/NCSuspiciousNounsEnricher.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/suspicious/NCSuspiciousNounsEnricher.scala
index fa9a3a2..f212687 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/suspicious/NCSuspiciousNounsEnricher.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/suspicious/NCSuspiciousNounsEnricher.scala
@@ -51,11 +51,14 @@ object NCSuspiciousNounsEnricher extends NCProbeEnricher {
}
@throws[NCE]
- override def enrich(mdl: NCProbeModel, ns: NCNlpSentence, senMeta: Map[String, Serializable], parent: Span = null): Unit =
+ override def enrich(mdl: NCProbeModel, ns: NCNlpSentence, senMeta: Map[String, Serializable], parent: Span = null): Unit = {
+ require(isStarted)
+
startScopedSpan("enrich", parent,
"srvReqId" → ns.srvReqId,
"mdlId" → mdl.model.getId,
"txt" → ns.text) { _ ⇒
ns.filter(t ⇒ mdl.suspWordsStems.contains(t.stem)).foreach(t ⇒ ns.fixNote(t.getNlpNote, "suspNoun" → true))
}
+ }
}
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/basenlp/NCBaseNlpEnricher.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/basenlp/NCBaseNlpEnricher.scala
index cc64123..7bbe5ac 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/basenlp/NCBaseNlpEnricher.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/basenlp/NCBaseNlpEnricher.scala
@@ -102,6 +102,8 @@ object NCBaseNlpEnricher extends NCServerEnricher {
*/
@throws[NCE]
override def enrich(ns: NCNlpSentence, parent: Span = null) {
+ require(isStarted)
+
startScopedSpan("enrich", parent, "srvReqId" → ns.srvReqId, "txt" → ns.text) { _ ⇒
// This must be 1st enricher in the pipeline.
assume(ns.isEmpty)
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/coordinate/NCCoordinatesEnricher.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/coordinate/NCCoordinatesEnricher.scala
index 2543757..ac82456 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/coordinate/NCCoordinatesEnricher.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/coordinate/NCCoordinatesEnricher.scala
@@ -181,31 +181,33 @@ object NCCoordinatesEnricher extends NCServerEnricher {
*/
private def hasStem(toks: Seq[NCNlpSentenceToken], stems: Seq[String]): Boolean = toks.exists(t ⇒ stems.contains(t.stem))
- override def enrich(ns: NCNlpSentence, parent: Span = null): Unit =
+ override def enrich(ns: NCNlpSentence, parent: Span = null): Unit = {
+ require(isStarted)
+
startScopedSpan("enrich", parent, "srvReqId" → ns.srvReqId, "txt" → ns.text) { _ ⇒
val nums = NCNumericManager.find(ns).sortBy(_.tokens.head.index)
-
+
if (nums.size >= 2) {
val markers = mutable.Buffer.empty[Seq[NCNlpSentenceToken]]
-
+
def areSuitableTokens(toks: Seq[NCNlpSentenceToken]): Boolean =
toks.forall(t ⇒ !t.isQuoted && !t.isBracketed) && !markers.exists(_.exists(t ⇒ toks.contains(t)))
-
+
for (toks ← ns.tokenMixWithStopWords() if areSuitableTokens(toks) && MARKERS_STEMS.contains(toks.map(_.stem).mkString(" ")))
markers += toks
-
+
val allMarkers = markers.flatten
val buf = mutable.Buffer.empty[NCNlpSentenceToken]
-
+
for (pair ← nums.sliding(2) if !buf.exists(t ⇒ pair.flatMap(_.tokens).contains(t))) {
var lat = pair.head
var lon = pair.last
-
+
val between = ns.slice(lat.tokens.last.index + 1, lon.tokens.head.index)
val before = getBefore(ns, ns.take(lat.tokens.head.index), markers)
-
+
val after = getAfter(ns, ns.drop(lon.tokens.last.index + 1), markers)
-
+
if (hasStem(before, lonStems) && hasStem(between, latStems) ||
hasStem(between, lonStems) && hasStem(after, latStems) ||
!inRange(lat, 90) && inRange(lat, 180)
@@ -216,25 +218,25 @@ object NCCoordinatesEnricher extends NCServerEnricher {
}
if (inRange(lat, 90) && inRange(lon, 180) && (markers.nonEmpty || similar2Coordinates(lat, lon))) {
val normBetween = between.diff(allMarkers)
-
+
if (normBetween.isEmpty ||
normBetween.forall(
t ⇒ t.isEmpty || t.pos == "IN" || SEPS.contains(t.normText) || EQUALS.contains(t.normText))
) {
val extra = (before ++ after ++ between).sortBy(_.index)
-
+
if (markers.exists(extra.containsSlice) || similar2Coordinates(lat, lon)) {
val toks = (lat.tokens ++ lon.tokens ++ extra ++ markers.flatten).distinct.sortBy(_.index)
-
+
val note = NCNlpSentenceNote(
toks.map(_.index),
"nlpcraft:coordinate",
"latitude" → lat.value,
"longitude" → lon.value
)
-
+
toks.foreach(_.add(note))
-
+
buf ++= toks
}
}
@@ -242,4 +244,5 @@ object NCCoordinatesEnricher extends NCServerEnricher {
}
}
}
+ }
}
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/date/NCDateEnricher.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/date/NCDateEnricher.scala
index eb002a5..2070ef9 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/date/NCDateEnricher.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/date/NCDateEnricher.scala
@@ -174,6 +174,8 @@ object NCDateEnricher extends NCServerEnricher {
*/
@throws[NCE]
override def enrich(ns: Sentence, parent: Span = null) {
+ require(isStarted)
+
// This stage must not be 1st enrichment stage.
assume(ns.nonEmpty)
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/geo/NCGeoEnricher.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/geo/NCGeoEnricher.scala
index 37bf87f..8162f92 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/geo/NCGeoEnricher.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/geo/NCGeoEnricher.scala
@@ -140,7 +140,9 @@ object NCGeoEnricher extends NCServerEnricher {
* @throws NCE
*/
@throws[NCE]
- override def enrich(ns: NCNlpSentence, parent: Span = null): Unit =
+ override def enrich(ns: NCNlpSentence, parent: Span = null): Unit = {
+ require(isStarted)
+
startScopedSpan("enrich", parent, "srvReqId" → ns.srvReqId, "txt" → ns.text) { _ ⇒
// This stage must not be 1st enrichment stage.
assume(ns.nonEmpty)
@@ -294,6 +296,7 @@ object NCGeoEnricher extends NCServerEnricher {
collapse(ns)
}
+ }
private def getValue(note: NCNlpSentenceNote, key: String): String = note(key).asInstanceOf[String]
private def getValueOpt(note: NCNlpSentenceNote, key: String): Option[String] = note.get(key) match {
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/numeric/NCNumericEnricher.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/numeric/NCNumericEnricher.scala
index a884c32..18a25fe 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/numeric/NCNumericEnricher.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/numeric/NCNumericEnricher.scala
@@ -239,7 +239,9 @@ object NCNumericEnricher extends NCServerEnricher {
* @throws NCE
*/
@throws[NCE]
- override def enrich(ns: NCNlpSentence, parent: Span = null): Unit =
+ override def enrich(ns: NCNlpSentence, parent: Span = null): Unit = {
+ require(isStarted)
+
startScopedSpan("enrich", parent, "srvReqId" → ns.srvReqId, "txt" → ns.text) { _ ⇒
val nums = NCNumericManager.find(ns)
@@ -438,4 +440,5 @@ object NCNumericEnricher extends NCServerEnricher {
}
}
+ }
}
\ No newline at end of file
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/quote/NCQuoteEnricher.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/quote/NCQuoteEnricher.scala
index d323785..532aa15 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/quote/NCQuoteEnricher.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/quote/NCQuoteEnricher.scala
@@ -56,7 +56,9 @@ object NCQuoteEnricher extends NCServerEnricher {
* @throws NCE
*/
@throws[NCE]
- override def enrich(ns: NCNlpSentence, parent: Span = null) {
+ override def enrich(ns: NCNlpSentence, parent: Span = null): Unit = {
+ require(isStarted)
+
startScopedSpan("enrich", parent, "srvReqId" → ns.srvReqId, "txt" → ns.text) { _ ⇒
// Clone input sentence.
val copy = ns.clone()
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/stopword/NCStopWordEnricher.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/stopword/NCStopWordEnricher.scala
index d8a1353..71dafc7 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/stopword/NCStopWordEnricher.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/stopword/NCStopWordEnricher.scala
@@ -531,7 +531,9 @@ object NCStopWordEnricher extends NCServerEnricher {
}
@throws[NCE]
- override def enrich(ns: NCNlpSentence, parent: Span = null) {
+ override def enrich(ns: NCNlpSentence, parent: Span = null): Unit = {
+ require(isStarted)
+
// This stage must not be 1st enrichment stage.
assume(ns.nonEmpty)