You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nlpcraft.apache.org by se...@apache.org on 2020/03/23 16:47:07 UTC

[incubator-nlpcraft] branch NLPCRAFT-15 updated (21b39b6 -> ee122d0)

This is an automated email from the ASF dual-hosted git repository.

sergeykamov pushed a change to branch NLPCRAFT-15
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git.


    from 21b39b6  WIP.
     new bf98f98  WIP.
     new a2ed8ab  WIP.
     new ee122d0  WIP.

The 3 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 .../apache/nlpcraft/common/nlp/NCNlpSentence.scala | 24 ++++++++++++--
 .../nlpcraft/common/nlp/NCNlpSentenceNote.scala    | 38 ++++++++++------------
 .../nlpcraft/common/nlp/NCNlpSentenceToken.scala   |  9 ++---
 .../dictionary/NCDictionaryEnricher.scala          | 19 ++++++-----
 .../mgrs/nlp/enrichers/model/NCModelEnricher.scala |  7 ++--
 .../nlp/enrichers/post/NCPostEnrichProcessor.scala | 10 +++---
 .../enrichers/stopword/NCStopWordEnricher.scala    |  4 +--
 .../suspicious/NCSuspiciousNounsEnricher.scala     |  2 +-
 .../server/nlp/enrichers/date/NCDateEnricher.scala |  4 +--
 .../server/nlp/enrichers/geo/NCGeoEnricher.scala   |  2 +-
 .../enrichers/stopword/NCStopWordEnricher.scala    | 18 +++++-----
 11 files changed, 76 insertions(+), 61 deletions(-)


[incubator-nlpcraft] 03/03: WIP.

Posted by se...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

sergeykamov pushed a commit to branch NLPCRAFT-15
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git

commit ee122d03b5940329a05bddf0ef7895dff5e2f5ea
Author: Sergey Kamov <se...@apache.org>
AuthorDate: Mon Mar 23 19:47:00 2020 +0300

    WIP.
---
 src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentenceToken.scala | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentenceToken.scala b/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentenceToken.scala
index 0dd2e67..7be0bee 100644
--- a/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentenceToken.scala
+++ b/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentenceToken.scala
@@ -202,5 +202,5 @@ object NCNlpSentenceToken {
     /**
      * To immutable iterator.
      */
-    implicit def notes(x: NCNlpSentenceToken): Iterable[NCNlpSentenceNote] = x.notes
+    implicit def notes(x: NCNlpSentenceToken): Iterable[NCNlpSentenceNote] = x.notes.toSet
 }


[incubator-nlpcraft] 01/03: WIP.

Posted by se...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

sergeykamov pushed a commit to branch NLPCRAFT-15
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git

commit bf98f9889f02229ec92bd168fd9a94536a6a30a4
Author: Sergey Kamov <se...@apache.org>
AuthorDate: Mon Mar 23 16:47:48 2020 +0300

    WIP.
---
 .../apache/nlpcraft/common/nlp/NCNlpSentence.scala | 23 +++++++++++++++++++---
 .../nlpcraft/common/nlp/NCNlpSentenceNote.scala    | 19 +++++++++++++++---
 .../nlpcraft/common/nlp/NCNlpSentenceToken.scala   |  7 +------
 .../dictionary/NCDictionaryEnricher.scala          | 19 +++++++++---------
 .../mgrs/nlp/enrichers/model/NCModelEnricher.scala |  7 +++++--
 .../nlp/enrichers/post/NCPostEnrichProcessor.scala | 10 +++++-----
 .../enrichers/stopword/NCStopWordEnricher.scala    |  4 ++--
 .../suspicious/NCSuspiciousNounsEnricher.scala     |  2 +-
 .../server/nlp/enrichers/date/NCDateEnricher.scala |  4 +---
 .../server/nlp/enrichers/geo/NCGeoEnricher.scala   |  2 +-
 .../enrichers/stopword/NCStopWordEnricher.scala    | 18 +++++++++--------
 11 files changed, 72 insertions(+), 43 deletions(-)

diff --git a/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala b/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala
index 9e3bcf9..1ec244e 100644
--- a/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala
+++ b/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala
@@ -38,8 +38,9 @@ class NCNlpSentence(
     val enabledBuiltInToks: Set[String],
     override val tokens: ArrayBuffer[NCNlpSentenceToken] = new ArrayBuffer[NCNlpSentenceToken](32)
 ) extends NCNlpSentenceTokenBuffer(tokens) with java.io.Serializable {
-    private lazy val hash =
-        Seq(srvReqId, text, enabledBuiltInToks, tokens).map(_.hashCode()).foldLeft(0)((a, b) ⇒ 31 * a + b)
+    private var hash: java.lang.Integer = _
+
+    private def calcHash(): Int = Seq(srvReqId, text, enabledBuiltInToks, tokens).map(_.hashCode()).foldLeft(0)((a, b) ⇒ 31 * a + b)
 
     // Deep copy.
     override def clone(): NCNlpSentence = new NCNlpSentence(srvReqId, text, weight, enabledBuiltInToks, tokens.map(_.clone()))
@@ -61,7 +62,23 @@ class NCNlpSentence(
       */
     def removeNote(note: NCNlpSentenceNote): Unit = this.foreach(_.remove(note))
 
-    override def hashCode(): Int = hash
+    override def hashCode(): Int = {
+        if (hash == null)
+            hash = calcHash()
+
+        hash
+    }
+
+    def fixNote(note: NCNlpSentenceNote, kvs: (String, java.io.Serializable)*): Unit = {
+        val fixed = note.clone(kvs: _*)
+
+        this.filter(t ⇒ t.index >= fixed.tokenIndexes.head && t.index <= fixed.tokenIndexes.last).foreach(t ⇒ {
+            t.remove(note)
+            t.add(fixed)
+        })
+
+        hash = null
+    }
 
     override def equals(obj: Any): Boolean = obj match {
         case x: NCNlpSentence ⇒
diff --git a/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentenceNote.scala b/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentenceNote.scala
index 91d6ebc..bb83a66 100644
--- a/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentenceNote.scala
+++ b/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentenceNote.scala
@@ -119,12 +119,25 @@ class NCNlpSentenceNote(
 
     /**
      *
-     * @param kv
+     * @param kvs
      */
-    def +=(kv : (String, java.io.Serializable)): Unit = {
+    def clone(kvs : (String, java.io.Serializable)*): NCNlpSentenceNote = {
+        val n = this.clone()
+
+        n.hash = null
+        kvs.foreach(kv ⇒ n.values += kv._1 → kv._2)
+
+        n
+    }
+
+    /**
+     *
+     * @param kvs
+     */
+    def +=(kvs : (String, java.io.Serializable)*): Unit = {
         hash = null
 
-        values += kv._1 → kv._2
+        kvs.foreach(kv ⇒ values += kv._1 → kv._2)
     }
 
     /**
diff --git a/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentenceToken.scala b/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentenceToken.scala
index 19d364f..0dd2e67 100644
--- a/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentenceToken.scala
+++ b/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentenceToken.scala
@@ -97,7 +97,7 @@ case class NCNlpSentenceToken(
       * because otherwise note cannot be found because its content changed and its hashCode changed too.
       * https://stackoverflow.com/questions/43553806/hashset-contains-returns-false-when-it-shouldnt/43554123
       */
-    def contains(note: NCNlpSentenceNote): Boolean = notes.toSet.contains(note)
+    def contains(note: NCNlpSentenceNote): Boolean = notes.contains(note)
 
     /**
       *
@@ -194,11 +194,6 @@ case class NCNlpSentenceToken(
       */
     def addStopReason(reason: NCNlpSentenceNote): Unit = stopsReasons += reason
 
-    /**
-      *
-      */
-    def markAsStop(): Unit = getNlpNote += "stopWord" → true
-
     override def toString: String =
         notes.toSeq.sortBy(t ⇒ (if (t.isNlp) 0 else 1, t.noteType)).mkString("NLP token [", "|", "]")
 }
diff --git a/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/dictionary/NCDictionaryEnricher.scala b/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/dictionary/NCDictionaryEnricher.scala
index 60d6fb7..b2c4137 100644
--- a/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/dictionary/NCDictionaryEnricher.scala
+++ b/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/dictionary/NCDictionaryEnricher.scala
@@ -62,15 +62,16 @@ object NCDictionaryEnricher extends NCProbeEnricher {
             ns.foreach(t ⇒ {
                 // Dictionary.
                 val nlpNote = t.getNlpNote
-    
-                // Single letters seems suspiciously.
-                nlpNote += "dict" → (NCDictionaryManager.contains(t.lemma) && t.lemma.length > 1)
-    
-                // English.
-                nlpNote += "english" → t.origText.matches("""[\s\w\p{Punct}]+""")
-    
-                // Swearwords.
-                nlpNote += "swear" → swearWords.contains(t.stem)
+
+                ns.fixNote(
+                    nlpNote,
+                    // Single letters seems suspiciously.
+                    "dict" → (NCDictionaryManager.contains(t.lemma) && t.lemma.length > 1),
+                    // English.
+                    "english" → t.origText.matches("""[\s\w\p{Punct}]+"""),
+                    // Swearwords.
+                    "swear" → swearWords.contains(t.stem)
+                )
             })
         }
 }
\ No newline at end of file
diff --git a/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala b/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
index 6998f6a..a3c08ce 100644
--- a/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
+++ b/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
@@ -170,6 +170,7 @@ object NCModelEnricher extends NCProbeEnricher with DecorateAsScala {
 
     /**
       *
+      * @param ns
       * @param elem
       * @param toks
       * @param direct
@@ -178,6 +179,7 @@ object NCModelEnricher extends NCProbeEnricher with DecorateAsScala {
       * @param parts
       */
     private def mark(
+        ns: NCNlpSentence,
         elem: NCElement,
         toks: Seq[NCNlpSentenceToken],
         direct: Boolean,
@@ -225,7 +227,7 @@ object NCModelEnricher extends NCProbeEnricher with DecorateAsScala {
         toks.foreach(_.add(note))
 
         // For NLP elements.
-        toks.foreach(_.getNlpNote += "direct" → direct)
+        toks.foreach(t ⇒ ns.fixNote(t.getNlpNote, "direct" → direct))
     }
 
     /**
@@ -428,7 +430,7 @@ object NCModelEnricher extends NCProbeEnricher with DecorateAsScala {
                 val tokIdxs = m.tokens.map(_.index)
                 val direct = syn.isDirect && (tokIdxs == tokIdxs.sorted)
 
-                mark(elem = elm, toks = m.tokens, direct = direct, syn = Some(syn), metaOpt = None, parts = m.parts)
+                mark(ns, elem = elm, toks = m.tokens, direct = direct, syn = Some(syn), metaOpt = None, parts = m.parts)
             }
 
             val parsers = mdl.model.getParsers
@@ -496,6 +498,7 @@ object NCModelEnricher extends NCProbeEnricher with DecorateAsScala {
                 
                                 if (!alreadyMarked(matchedToks, elemId))
                                     mark(
+                                        ns,
                                         elem = mdl.elements.getOrElse(elemId, throw new NCE(s"Custom model parser returned unknown element ID: $elemId")),
                                         toks = matchedToks,
                                         direct = true,
diff --git a/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/post/NCPostEnrichProcessor.scala b/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/post/NCPostEnrichProcessor.scala
index 6764cd7..939a43a 100644
--- a/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/post/NCPostEnrichProcessor.scala
+++ b/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/post/NCPostEnrichProcessor.scala
@@ -207,7 +207,7 @@ object NCPostEnrichProcessor extends NCService with LazyLogging {
             filter(_.isStopWord).
             flatten.
             filter(_.isNlp).
-            foreach(_ += "stopWord" → false)
+            foreach(n ⇒ ns.fixNote(n, "stopWord" → false))
 
         val nsNotes: Map[String, Seq[Int]] = ns.tokens.flatten.map(p ⇒ p.noteType → p.tokenIndexes).toMap
 
@@ -216,7 +216,7 @@ object NCPostEnrichProcessor extends NCService with LazyLogging {
             stopReason ← t.stopsReasons
             if nsNotes.getOrElse(stopReason.noteType, Seq.empty) == stopReason.tokenIndexes
         )
-            t.markAsStop()
+            ns.fixNote(t.getNlpNote, "stopWord" → true)
 
         val history = mutable.ArrayBuffer.empty[(Int, Int)]
 
@@ -318,7 +318,7 @@ object NCPostEnrichProcessor extends NCService with LazyLogging {
                     fixed = fixed.distinct
 
                     if (idxs != fixed) {
-                        n += "indexes" → fixed.asJava.asInstanceOf[java.io.Serializable]
+                        ns.fixNote(n, "indexes" → fixed.asJava.asInstanceOf[java.io.Serializable])
 
                         def x(seq: Seq[Int]): String = s"[${seq.mkString(", ")}]"
 
@@ -363,7 +363,7 @@ object NCPostEnrichProcessor extends NCService with LazyLogging {
                     if (fixed.forall(_.size == 1)) {
                         // Fix double dimension array to one dimension,
                         // so it should be called always inspite of fixIndexesReferences method.
-                        n += idxsField → fixed.map(_.head).asJava.asInstanceOf[java.io.Serializable]
+                        ns.fixNote(n, idxsField → fixed.map(_.head).asJava.asInstanceOf[java.io.Serializable])
 
                         def x(seq: Seq[Seq[Int]]): String = s"[${seq.map(p ⇒ s"[${p.mkString(",")}]").mkString(", ")}]"
 
@@ -520,7 +520,7 @@ object NCPostEnrichProcessor extends NCService with LazyLogging {
         }
 
         // Special case - field index of core NLP note.
-        ns.zipWithIndex.foreach { case (tok, idx) ⇒ tok.getNlpNote += "index" → idx }
+        ns.zipWithIndex.foreach { case (tok, idx) ⇒ ns.fixNote(tok.getNlpNote, "index" → idx) }
     }
 
     /**
diff --git a/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/stopword/NCStopWordEnricher.scala b/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/stopword/NCStopWordEnricher.scala
index f2545d8..defb533 100644
--- a/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/stopword/NCStopWordEnricher.scala
+++ b/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/stopword/NCStopWordEnricher.scala
@@ -186,7 +186,7 @@ object NCStopWordEnricher extends NCProbeEnricher {
                     POSES.contains(tok.pos) &&
                     ns(idx + 1).isStopWord
             ) {
-                tok.markAsStop()
+                ns.fixNote(tok.getNlpNote, "stopWord" → true)
 
                 stop = false
             }
@@ -201,7 +201,7 @@ object NCStopWordEnricher extends NCProbeEnricher {
     @throws[NCE]
     override def enrich(mdl: NCModelDecorator, ns: NCNlpSentence, senMeta: Map[String, Serializable], parent: Span = null): Unit = {
         def mark(stems: Set[String], f: Boolean): Unit =
-            ns.filter(t ⇒ stems.contains(t.stem)).foreach(_.getNlpNote += "stopWord" → f)
+            ns.filter(t ⇒ stems.contains(t.stem)).foreach(t ⇒ ns.fixNote(t.getNlpNote, "stopWord" → f))
 
         startScopedSpan("enrich", parent, "srvReqId" → ns.srvReqId, "modelId" → mdl.model.getId, "txt" → ns.text) { _ ⇒
 
diff --git a/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/suspicious/NCSuspiciousNounsEnricher.scala b/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/suspicious/NCSuspiciousNounsEnricher.scala
index 345df78..be05916 100644
--- a/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/suspicious/NCSuspiciousNounsEnricher.scala
+++ b/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/suspicious/NCSuspiciousNounsEnricher.scala
@@ -45,6 +45,6 @@ object NCSuspiciousNounsEnricher extends NCProbeEnricher {
             "srvReqId" → ns.srvReqId,
             "modelId" → mdl.model.getId,
             "txt" → ns.text) { _ ⇒
-            ns.filter(t ⇒ mdl.suspiciousWordsStems.contains(t.stem)).foreach(_.getNlpNote += "suspNoun" → true)
+            ns.filter(t ⇒ mdl.suspiciousWordsStems.contains(t.stem)).foreach(t ⇒ ns.fixNote(t.getNlpNote, "suspNoun" → true))
         }
 }
diff --git a/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/date/NCDateEnricher.scala b/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/date/NCDateEnricher.scala
index c0344fd..5908f1a 100644
--- a/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/date/NCDateEnricher.scala
+++ b/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/date/NCDateEnricher.scala
@@ -461,9 +461,7 @@ object NCDateEnricher extends NCServerEnricher {
                     seq.foreach(n ⇒ {
                         val r = convertRange(mkDateRange(n))
 
-                        n += "from" → r.from
-                        n += "to" → r.to
-                        n += "periods" → new util.ArrayList[String]()
+                        ns.fixNote(n, "from" → r.from, "to" → r.to, "periods" → new util.ArrayList[String]())
                     })
 
                     def optHolder(b: Boolean) = if (b) Some(base) else None
diff --git a/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/geo/NCGeoEnricher.scala b/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/geo/NCGeoEnricher.scala
index ab53510..dc93989 100644
--- a/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/geo/NCGeoEnricher.scala
+++ b/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/geo/NCGeoEnricher.scala
@@ -180,7 +180,7 @@ object NCGeoEnricher extends NCServerEnricher {
 
                         // Other types(JJ etc) and quoted word are not re-marked.
                         toks.filter(t ⇒ !NCPennTreebank.NOUNS_POS.contains(t.pos) && t.pos != "FW").
-                            foreach(t ⇒ t.getNlpNote += "pos" → NCPennTreebank.SYNTH_POS)
+                            foreach(t ⇒ ns.fixNote(t.getNlpNote, "pos" → NCPennTreebank.SYNTH_POS))
                     }
 
                 LOCATIONS.get(toks.map(_.normText).mkString(" ")) match {
diff --git a/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/stopword/NCStopWordEnricher.scala b/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/stopword/NCStopWordEnricher.scala
index 9de302b..92168ba 100644
--- a/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/stopword/NCStopWordEnricher.scala
+++ b/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/stopword/NCStopWordEnricher.scala
@@ -389,7 +389,7 @@ object NCStopWordEnricher extends NCServerEnricher {
                 !isException(Seq(tok)) &&
                 stopPoses.contains(tok.pos) &&
                 ns(idx + 1).isStopWord) {
-            tok.markAsStop()
+            ns.fixNote(tok.getNlpNote, "stopWord" → true)
 
             stop = false
         }
@@ -466,9 +466,9 @@ object NCStopWordEnricher extends NCServerEnricher {
                             replace("nlpcraft:nlp")
 
                             // NLP note special case because has index field.
-                            newTok.getNlpNote += "index" → idx
-
                             ns += newTok
+
+                            ns.fixNote(newTok.getNlpNote, "index" → idx)
                         }
 
                         if (isRBR(tok) && !tok.isQuoted)
@@ -597,7 +597,7 @@ object NCStopWordEnricher extends NCServerEnricher {
                     // be, was, is etc. or have done etc.
                     isCommonVerbs("have", "do")
                 if (stop)
-                    tok.markAsStop()
+                    ns.fixNote(tok.getNlpNote, "stopWord" → true)
             }
             // +--------------------------------------+
             // | Pass #3.                             |
@@ -607,7 +607,7 @@ object NCStopWordEnricher extends NCServerEnricher {
             val mix = ns.tokenMixWithStopWords()
             
             for (toks ← mix if !buf.exists(_.containsSlice(toks)) && isStop(toks) && !isException(toks)) {
-                toks.foreach(_.markAsStop())
+                toks.foreach(tok ⇒ ns.fixNote(tok.getNlpNote, "stopWord" → true))
                 buf += toks
             }
             
@@ -619,7 +619,7 @@ object NCStopWordEnricher extends NCServerEnricher {
             // | Check external possessive stop-word file.  |
             // +--------------------------------------------+
             for (tup ← origToks; key = tup._2 if POSSESSIVE_WORDS.contains(key) && !isException(tup._1))
-                tup._1.foreach(_.markAsStop())
+                tup._1.foreach(tok ⇒ ns.fixNote(tok.getNlpNote, "stopWord" → true))
             
             // +--------------------------------------------------+
             // | Pass #5.                                         |
@@ -632,7 +632,7 @@ object NCStopWordEnricher extends NCServerEnricher {
             val startToks = ns.takeWhile(_.isStopWord) ++ ns.find(!_.isStopWord).map(p ⇒ p)
             for (startTok ← startToks; tup ← origToks.filter(_._1.head == startTok); key = tup._2
                 if FIRST_WORDS.contains(key) && !isException(tup._1)) {
-                tup._1.foreach(_.markAsStop())
+                tup._1.foreach(tok ⇒ ns.fixNote(tok.getNlpNote, "stopWord" → true))
                 foundKeys += key
             }
     
@@ -642,7 +642,9 @@ object NCStopWordEnricher extends NCServerEnricher {
             // +-------------------------------------------------+
             for (tup ← origToks; key = tup._2 if !foundKeys.contains(key) && !isException(tup._1))
                 foundKeys.find(key.startsWith) match {
-                    case Some(s) ⇒ if (NOUN_WORDS.contains(key.substring(s.length).trim)) tup._1.foreach(_.markAsStop())
+                    case Some(s) ⇒
+                        if (NOUN_WORDS.contains(key.substring(s.length).trim))
+                            tup._1.foreach(tok ⇒ ns.fixNote(tok.getNlpNote, "stopWord" → true))
                     case None ⇒ ()
                 }
     


[incubator-nlpcraft] 02/03: WIP.

Posted by se...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

sergeykamov pushed a commit to branch NLPCRAFT-15
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git

commit a2ed8ab917470cb43a8a677db857d4c3a4eb2b79
Author: Sergey Kamov <se...@apache.org>
AuthorDate: Mon Mar 23 19:38:49 2020 +0300

    WIP.
---
 .../apache/nlpcraft/common/nlp/NCNlpSentence.scala |  1 +
 .../nlpcraft/common/nlp/NCNlpSentenceNote.scala    | 45 ++++++++--------------
 2 files changed, 16 insertions(+), 30 deletions(-)

diff --git a/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala b/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala
index 1ec244e..dd517d9 100644
--- a/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala
+++ b/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala
@@ -38,6 +38,7 @@ class NCNlpSentence(
     val enabledBuiltInToks: Set[String],
     override val tokens: ArrayBuffer[NCNlpSentenceToken] = new ArrayBuffer[NCNlpSentenceToken](32)
 ) extends NCNlpSentenceTokenBuffer(tokens) with java.io.Serializable {
+    @transient
     private var hash: java.lang.Integer = _
 
     private def calcHash(): Int = Seq(srvReqId, text, enabledBuiltInToks, tokens).map(_.hashCode()).foldLeft(0)((a, b) ⇒ 31 * a + b)
diff --git a/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentenceNote.scala b/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentenceNote.scala
index bb83a66..d707000 100644
--- a/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentenceNote.scala
+++ b/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentenceNote.scala
@@ -28,13 +28,11 @@ import scala.language.implicitConversions
   * Sentence token note is a typed map of KV pairs.
   *
   */
-class NCNlpSentenceNote(
-    private val values: mutable.HashMap[String, java.io.Serializable] = mutable.HashMap[String, java.io.Serializable]()
-) extends java.io.Serializable with NCAsciiLike {
+class NCNlpSentenceNote(private val values: Map[String, java.io.Serializable]) extends java.io.Serializable with NCAsciiLike {
     import NCNlpSentenceNote._
 
     @transient
-    private var hash: java.lang.Integer = _
+    private lazy val hash = values.hashCode()
 
     // Shortcuts for mandatory fields. (Immutable fields)
     lazy val noteType: String = values("noteType").asInstanceOf[String]
@@ -63,13 +61,7 @@ class NCNlpSentenceNote(
         case _ ⇒ false
     }
 
-    override def hashCode(): Int =
-        values.synchronized {
-            if (hash == null)
-                hash = values.hashCode()
-
-            hash
-        }
+    override def hashCode(): Int = hash
 
     /**
       * Clones this note.
@@ -82,7 +74,11 @@ class NCNlpSentenceNote(
             values.filter(p ⇒ !SKIP_CLONE.contains(p._1)).toSeq ++ params:_*
         )
 
-    override def clone(): NCNlpSentenceNote = new NCNlpSentenceNote(values.clone())
+    override def clone(): NCNlpSentenceNote = {
+        val m = mutable.Map.empty[String, java.io.Serializable] ++ values
+
+        new NCNlpSentenceNote(m.toMap)
+    }
 
     /**
       *
@@ -96,7 +92,7 @@ class NCNlpSentenceNote(
       * @return
       */
     def skipNlp(): Map[String, java.io.Serializable] =
-        values.filter { case (key, _) ⇒ !SKIP_CLONE.contains(key) && key != "noteType" }.toMap
+        values.filter { case (key, _) ⇒ !SKIP_CLONE.contains(key) && key != "noteType" }
 
     /**
       *
@@ -110,7 +106,7 @@ class NCNlpSentenceNote(
         else {
             val md = mutable.Map.empty[String, java.io.Serializable]
 
-            val m = if (noteType != "nlpcraft:nlp") skipNlp() else values.toMap
+            val m = if (noteType != "nlpcraft:nlp") skipNlp() else values
 
             m.foreach { case (name, value) ⇒ md += (name.toLowerCase() → value)}
 
@@ -122,22 +118,11 @@ class NCNlpSentenceNote(
      * @param kvs
      */
     def clone(kvs : (String, java.io.Serializable)*): NCNlpSentenceNote = {
-        val n = this.clone()
-
-        n.hash = null
-        kvs.foreach(kv ⇒ n.values += kv._1 → kv._2)
+        val m = mutable.HashMap.empty[String, java.io.Serializable] ++ values
 
-        n
-    }
-
-    /**
-     *
-     * @param kvs
-     */
-    def +=(kvs : (String, java.io.Serializable)*): Unit = {
-        hash = null
+        kvs.foreach(kv ⇒ m += kv._1 → kv._2)
 
-        kvs.foreach(kv ⇒ values += kv._1 → kv._2)
+        new NCNlpSentenceNote(m.toMap)
     }
 
     /**
@@ -174,7 +159,7 @@ object NCNlpSentenceNote {
     /**
      * To immutable map.
      */
-    implicit def values(x: NCNlpSentenceNote): mutable.HashMap[String, java.io.Serializable] = x.values
+    implicit def values(note: NCNlpSentenceNote): Map[String, java.io.Serializable] = note.values
 
     /**
       * Creates new note with given parameters.
@@ -208,7 +193,7 @@ object NCNlpSentenceNote {
                ("wordLength" → len) :+
                ("sparsity" → sparsity) :+
                ("contiguous" → (sparsity == 0))
-            ).map(p ⇒ p._1 → p._2.asInstanceOf[java.io.Serializable]): _*)
+            ).map(p ⇒ p._1 → p._2.asInstanceOf[java.io.Serializable]): _*).toMap
         )
     }