You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nlpcraft.apache.org by ar...@apache.org on 2020/09/28 03:45:38 UTC

[incubator-nlpcraft] 02/03: WIP.

This is an automated email from the ASF dual-hosted git repository.

aradzinski pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git

commit 1c1bfad465b5c6d82daed99d25e51ba631d6ba68
Author: Aaron Radzinski <ar...@datalingvo.com>
AuthorDate: Sun Sep 27 15:35:15 2020 -0700

    WIP.
---
 .../dictionary/NCDictionaryEnricher.scala          |  5 +++-
 .../mgrs/nlp/enrichers/limit/NCLimitEnricher.scala |  5 +++-
 .../mgrs/nlp/enrichers/model/NCModelEnricher.scala |  5 +++-
 .../enrichers/relation/NCRelationEnricher.scala    |  5 +++-
 .../mgrs/nlp/enrichers/sort/NCSortEnricher.scala   |  5 +++-
 .../enrichers/stopword/NCStopWordEnricher.scala    |  2 ++
 .../suspicious/NCSuspiciousNounsEnricher.scala     |  5 +++-
 .../nlp/enrichers/basenlp/NCBaseNlpEnricher.scala  |  2 ++
 .../coordinate/NCCoordinatesEnricher.scala         | 31 ++++++++++++----------
 .../server/nlp/enrichers/date/NCDateEnricher.scala |  2 ++
 .../server/nlp/enrichers/geo/NCGeoEnricher.scala   |  5 +++-
 .../nlp/enrichers/numeric/NCNumericEnricher.scala  |  5 +++-
 .../nlp/enrichers/quote/NCQuoteEnricher.scala      |  4 ++-
 .../enrichers/stopword/NCStopWordEnricher.scala    |  4 ++-
 14 files changed, 61 insertions(+), 24 deletions(-)

diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/dictionary/NCDictionaryEnricher.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/dictionary/NCDictionaryEnricher.scala
index 2be0859..325c90c 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/dictionary/NCDictionaryEnricher.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/dictionary/NCDictionaryEnricher.scala
@@ -62,7 +62,9 @@ object NCDictionaryEnricher extends NCProbeEnricher {
     }
     
     @throws[NCE]
-    override def enrich(mdl: NCProbeModel, ns: NCNlpSentence, senMeta: Map[String, Serializable], parent: Span = null): Unit =
+    override def enrich(mdl: NCProbeModel, ns: NCNlpSentence, senMeta: Map[String, Serializable], parent: Span = null): Unit = {
+        require(isStarted)
+
         startScopedSpan("enrich", parent,
             "srvReqId" → ns.srvReqId,
             "mdlId" → mdl.model.getId,
@@ -82,4 +84,5 @@ object NCDictionaryEnricher extends NCProbeEnricher {
                 )
             })
         }
+    }
 }
\ No newline at end of file
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/limit/NCLimitEnricher.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/limit/NCLimitEnricher.scala
index 2c1f713..43c3748 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/limit/NCLimitEnricher.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/limit/NCLimitEnricher.scala
@@ -246,7 +246,9 @@ object NCLimitEnricher extends NCProbeEnricher {
     }
 
     @throws[NCE]
-    override def enrich(mdl: NCProbeModel, ns: NCNlpSentence, senMeta: Map[String, Serializable], parent: Span = null): Unit =
+    override def enrich(mdl: NCProbeModel, ns: NCNlpSentence, senMeta: Map[String, Serializable], parent: Span = null): Unit = {
+        require(isStarted)
+
         startScopedSpan("enrich", parent,
             "srvReqId" → ns.srvReqId,
             "mdlId" → mdl.model.getId,
@@ -281,6 +283,7 @@ object NCLimitEnricher extends NCProbeEnricher {
                     case None ⇒ // No-op.
                 }
         }
+    }
 
     /**
       *
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
index 03594d1..e2e2265 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
@@ -311,7 +311,9 @@ object NCModelEnricher extends NCProbeEnricher with DecorateAsScala {
     def isComplex(mdl: NCProbeModel): Boolean = mdl.synonymsDsl.nonEmpty || !mdl.model.getParsers.isEmpty
 
     @throws[NCE]
-    override def enrich(mdl: NCProbeModel, ns: NCNlpSentence, senMeta: Map[String, Serializable], parent: Span = null): Unit =
+    override def enrich(mdl: NCProbeModel, ns: NCNlpSentence, senMeta: Map[String, Serializable], parent: Span = null): Unit = {
+        require(isStarted)
+
         startScopedSpan("enrich", parent,
             "srvReqId" → ns.srvReqId,
             "mdlId" → mdl.model.getId,
@@ -518,4 +520,5 @@ object NCModelEnricher extends NCProbeEnricher with DecorateAsScala {
                 parser.onDiscard()
             }
         }
+    }
 }
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/relation/NCRelationEnricher.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/relation/NCRelationEnricher.scala
index ff4475c..5a52f8c 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/relation/NCRelationEnricher.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/relation/NCRelationEnricher.scala
@@ -147,7 +147,9 @@ object NCRelationEnricher extends NCProbeEnricher {
     }
 
     @throws[NCE]
-    override def enrich(mdl: NCProbeModel, ns: NCNlpSentence, senMeta: Map[String, Serializable], parent: Span = null): Unit =
+    override def enrich(mdl: NCProbeModel, ns: NCNlpSentence, senMeta: Map[String, Serializable], parent: Span = null): Unit = {
+        require(isStarted)
+
         startScopedSpan("enrich", parent,
             "srvReqId" → ns.srvReqId,
             "mdlId" → mdl.model.getId,
@@ -179,6 +181,7 @@ object NCRelationEnricher extends NCProbeEnricher {
                     case None ⇒ // No-op.
                 }
         }
+    }
 
     /**
       *
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/sort/NCSortEnricher.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/sort/NCSortEnricher.scala
index f549dd5..d177c10 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/sort/NCSortEnricher.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/sort/NCSortEnricher.scala
@@ -437,7 +437,9 @@ object NCSortEnricher extends NCProbeEnricher {
         toks.length == toks2.length || toks.count(isImportant) == toks2.count(isImportant)
     }
 
-    override def enrich(mdl: NCProbeModel, ns: NCNlpSentence, meta: Map[String, Serializable], parent: Span): Unit =
+    override def enrich(mdl: NCProbeModel, ns: NCNlpSentence, meta: Map[String, Serializable], parent: Span): Unit = {
+        require(isStarted)
+
         startScopedSpan("enrich", parent,
             "srvReqId" → ns.srvReqId,
             "mdlId" → mdl.model.getId,
@@ -506,6 +508,7 @@ object NCSortEnricher extends NCProbeEnricher {
                 }
             }
         }
+    }
 
     /**
      *
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/stopword/NCStopWordEnricher.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/stopword/NCStopWordEnricher.scala
index 255d91b..b386978 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/stopword/NCStopWordEnricher.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/stopword/NCStopWordEnricher.scala
@@ -218,6 +218,8 @@ object NCStopWordEnricher extends NCProbeEnricher {
 
     @throws[NCE]
     override def enrich(mdl: NCProbeModel, ns: NCNlpSentence, senMeta: Map[String, Serializable], parent: Span = null): Unit = {
+        require(isStarted)
+
         def mark(stems: Set[String], f: Boolean): Unit =
             ns.filter(t ⇒ stems.contains(t.stem)).foreach(t ⇒ ns.fixNote(t.getNlpNote, "stopWord" → f))
 
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/suspicious/NCSuspiciousNounsEnricher.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/suspicious/NCSuspiciousNounsEnricher.scala
index fa9a3a2..f212687 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/suspicious/NCSuspiciousNounsEnricher.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/suspicious/NCSuspiciousNounsEnricher.scala
@@ -51,11 +51,14 @@ object NCSuspiciousNounsEnricher extends NCProbeEnricher {
     }
 
     @throws[NCE]
-    override def enrich(mdl: NCProbeModel, ns: NCNlpSentence, senMeta: Map[String, Serializable], parent: Span = null): Unit =
+    override def enrich(mdl: NCProbeModel, ns: NCNlpSentence, senMeta: Map[String, Serializable], parent: Span = null): Unit = {
+        require(isStarted)
+
         startScopedSpan("enrich", parent,
             "srvReqId" → ns.srvReqId,
             "mdlId" → mdl.model.getId,
             "txt" → ns.text) { _ ⇒
             ns.filter(t ⇒ mdl.suspWordsStems.contains(t.stem)).foreach(t ⇒ ns.fixNote(t.getNlpNote, "suspNoun" → true))
         }
+    }
 }
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/basenlp/NCBaseNlpEnricher.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/basenlp/NCBaseNlpEnricher.scala
index cc64123..7bbe5ac 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/basenlp/NCBaseNlpEnricher.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/basenlp/NCBaseNlpEnricher.scala
@@ -102,6 +102,8 @@ object NCBaseNlpEnricher extends NCServerEnricher {
      */
     @throws[NCE]
     override def enrich(ns: NCNlpSentence, parent: Span = null) {
+        require(isStarted)
+
         startScopedSpan("enrich", parent, "srvReqId" → ns.srvReqId, "txt" → ns.text) { _ ⇒
             // This must be 1st enricher in the pipeline.
             assume(ns.isEmpty)
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/coordinate/NCCoordinatesEnricher.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/coordinate/NCCoordinatesEnricher.scala
index 2543757..ac82456 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/coordinate/NCCoordinatesEnricher.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/coordinate/NCCoordinatesEnricher.scala
@@ -181,31 +181,33 @@ object NCCoordinatesEnricher extends NCServerEnricher {
       */
     private def hasStem(toks: Seq[NCNlpSentenceToken], stems: Seq[String]): Boolean = toks.exists(t ⇒ stems.contains(t.stem))
     
-    override def enrich(ns: NCNlpSentence, parent: Span = null): Unit =
+    override def enrich(ns: NCNlpSentence, parent: Span = null): Unit = {
+        require(isStarted)
+
         startScopedSpan("enrich", parent, "srvReqId" → ns.srvReqId, "txt" → ns.text) { _ ⇒
             val nums = NCNumericManager.find(ns).sortBy(_.tokens.head.index)
-            
+
             if (nums.size >= 2) {
                 val markers = mutable.Buffer.empty[Seq[NCNlpSentenceToken]]
-        
+
                 def areSuitableTokens(toks: Seq[NCNlpSentenceToken]): Boolean =
                     toks.forall(t ⇒ !t.isQuoted && !t.isBracketed) && !markers.exists(_.exists(t ⇒ toks.contains(t)))
-        
+
                 for (toks ← ns.tokenMixWithStopWords() if areSuitableTokens(toks) && MARKERS_STEMS.contains(toks.map(_.stem).mkString(" ")))
                     markers += toks
-                
+
                 val allMarkers = markers.flatten
                 val buf = mutable.Buffer.empty[NCNlpSentenceToken]
-                
+
                 for (pair ← nums.sliding(2) if !buf.exists(t ⇒ pair.flatMap(_.tokens).contains(t))) {
                     var lat = pair.head
                     var lon = pair.last
-                
+
                     val between = ns.slice(lat.tokens.last.index + 1, lon.tokens.head.index)
                     val before = getBefore(ns, ns.take(lat.tokens.head.index), markers)
-                
+
                     val after = getAfter(ns, ns.drop(lon.tokens.last.index + 1), markers)
-                
+
                     if (hasStem(before, lonStems) && hasStem(between, latStems) ||
                         hasStem(between, lonStems) && hasStem(after, latStems) ||
                         !inRange(lat, 90) && inRange(lat, 180)
@@ -216,25 +218,25 @@ object NCCoordinatesEnricher extends NCServerEnricher {
                     }
                     if (inRange(lat, 90) && inRange(lon, 180) && (markers.nonEmpty || similar2Coordinates(lat, lon))) {
                         val normBetween = between.diff(allMarkers)
-                        
+
                         if (normBetween.isEmpty ||
                             normBetween.forall(
                                 t ⇒ t.isEmpty || t.pos == "IN" || SEPS.contains(t.normText) || EQUALS.contains(t.normText))
                         ) {
                             val extra = (before ++ after ++ between).sortBy(_.index)
-                        
+
                             if (markers.exists(extra.containsSlice) || similar2Coordinates(lat, lon)) {
                                 val toks = (lat.tokens ++ lon.tokens ++ extra ++ markers.flatten).distinct.sortBy(_.index)
-                        
+
                                 val note = NCNlpSentenceNote(
                                     toks.map(_.index),
                                     "nlpcraft:coordinate",
                                     "latitude" → lat.value,
                                     "longitude" → lon.value
                                 )
-                        
+
                                 toks.foreach(_.add(note))
-                        
+
                                 buf ++= toks
                             }
                         }
@@ -242,4 +244,5 @@ object NCCoordinatesEnricher extends NCServerEnricher {
                 }
             }
         }
+    }
 }
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/date/NCDateEnricher.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/date/NCDateEnricher.scala
index eb002a5..2070ef9 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/date/NCDateEnricher.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/date/NCDateEnricher.scala
@@ -174,6 +174,8 @@ object NCDateEnricher extends NCServerEnricher {
      */
     @throws[NCE]
     override def enrich(ns: Sentence, parent: Span = null) {
+        require(isStarted)
+
         // This stage must not be 1st enrichment stage.
         assume(ns.nonEmpty)
         
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/geo/NCGeoEnricher.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/geo/NCGeoEnricher.scala
index 37bf87f..8162f92 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/geo/NCGeoEnricher.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/geo/NCGeoEnricher.scala
@@ -140,7 +140,9 @@ object NCGeoEnricher extends NCServerEnricher {
      * @throws NCE
      */
     @throws[NCE]
-    override def enrich(ns: NCNlpSentence, parent: Span = null): Unit =
+    override def enrich(ns: NCNlpSentence, parent: Span = null): Unit = {
+        require(isStarted)
+
         startScopedSpan("enrich", parent, "srvReqId" → ns.srvReqId, "txt" → ns.text) { _ ⇒
             // This stage must not be 1st enrichment stage.
             assume(ns.nonEmpty)
@@ -294,6 +296,7 @@ object NCGeoEnricher extends NCServerEnricher {
 
             collapse(ns)
         }
+    }
 
     private def getValue(note: NCNlpSentenceNote, key: String): String = note(key).asInstanceOf[String]
     private def getValueOpt(note: NCNlpSentenceNote, key: String): Option[String] = note.get(key) match {
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/numeric/NCNumericEnricher.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/numeric/NCNumericEnricher.scala
index a884c32..18a25fe 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/numeric/NCNumericEnricher.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/numeric/NCNumericEnricher.scala
@@ -239,7 +239,9 @@ object NCNumericEnricher extends NCServerEnricher {
      * @throws NCE
      */
     @throws[NCE]
-    override def enrich(ns: NCNlpSentence, parent: Span = null): Unit =
+    override def enrich(ns: NCNlpSentence, parent: Span = null): Unit = {
+        require(isStarted)
+
         startScopedSpan("enrich", parent, "srvReqId" → ns.srvReqId, "txt" → ns.text) { _ ⇒
             val nums = NCNumericManager.find(ns)
     
@@ -438,4 +440,5 @@ object NCNumericEnricher extends NCServerEnricher {
             }
     
         }
+    }
 }
\ No newline at end of file
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/quote/NCQuoteEnricher.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/quote/NCQuoteEnricher.scala
index d323785..532aa15 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/quote/NCQuoteEnricher.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/quote/NCQuoteEnricher.scala
@@ -56,7 +56,9 @@ object NCQuoteEnricher extends NCServerEnricher {
      * @throws NCE
      */
     @throws[NCE]
-    override def enrich(ns: NCNlpSentence, parent: Span = null) {
+    override def enrich(ns: NCNlpSentence, parent: Span = null): Unit = {
+        require(isStarted)
+
         startScopedSpan("enrich", parent, "srvReqId" → ns.srvReqId, "txt" → ns.text) { _ ⇒
             // Clone input sentence.
             val copy = ns.clone()
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/stopword/NCStopWordEnricher.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/stopword/NCStopWordEnricher.scala
index d8a1353..71dafc7 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/stopword/NCStopWordEnricher.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/stopword/NCStopWordEnricher.scala
@@ -531,7 +531,9 @@ object NCStopWordEnricher extends NCServerEnricher {
     }
 
     @throws[NCE]
-    override def enrich(ns: NCNlpSentence, parent: Span = null) {
+    override def enrich(ns: NCNlpSentence, parent: Span = null): Unit = {
+        require(isStarted)
+
         // This stage must not be 1st enrichment stage.
         assume(ns.nonEmpty)