You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nlpcraft.apache.org by se...@apache.org on 2020/05/20 07:28:26 UTC
[incubator-nlpcraft] branch NLPCRAFT-30 updated (ad958d1 -> 0b0c2ad)
This is an automated email from the ASF dual-hosted git repository.
sergeykamov pushed a change to branch NLPCRAFT-30
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git.
from ad958d1 Renaming.
add a61c6c9 Utilities methods refactoring.
add a8f3625 Utilities methods refactoring.
new 364d05d Utilities methods refactoring. Last iteration.
new 0fc25c1 Merge branch 'NLPCRAFT-30-tmp' into NLPCRAFT-30
new 0b0c2ad Utilities methods refactoring. Last iteration.
The 3 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails. The revisions
listed as "add" were already present in the repository and have only
been added to this reference.
Summary of changes:
.../apache/nlpcraft/common/nlp/NCNlpSentence.scala | 633 ++++++++++++++++-
.../nlpcraft/common/nlp/NCNlpSentenceNote.scala | 45 +-
.../apache/nlpcraft/model/impl/NCValueImpl.java | 50 --
.../nlpcraft/probe/mgrs/NCModelDecorator.scala | 71 +-
.../probe/mgrs/nlp/NCProbeEnrichmentManager.scala | 22 +-
.../probe/mgrs/nlp/enrichers/NCEnricherUtils.scala | 779 ---------------------
.../mgrs/nlp/enrichers/limit/NCLimitEnricher.scala | 27 +-
.../mgrs/nlp/enrichers/model/NCModelEnricher.scala | 8 +-
.../enrichers/relation/NCRelationEnricher.scala | 27 +-
.../mgrs/nlp/enrichers/sort/NCSortEnricher.scala | 24 +-
10 files changed, 809 insertions(+), 877 deletions(-)
delete mode 100644 src/main/scala/org/apache/nlpcraft/model/impl/NCValueImpl.java
delete mode 100644 src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/NCEnricherUtils.scala
[incubator-nlpcraft] 03/03: Utilities methods refactoring. Last
iteration.
Posted by se...@apache.org.
This is an automated email from the ASF dual-hosted git repository.
sergeykamov pushed a commit to branch NLPCRAFT-30
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
commit 0b0c2ad5c9d38cc4605a034450ada2c504ea10a8
Author: Sergey Kamov <se...@apache.org>
AuthorDate: Wed May 20 10:28:18 2020 +0300
Utilities methods refactoring. Last iteration.
---
src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala | 7 +++----
1 file changed, 3 insertions(+), 4 deletions(-)
diff --git a/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala b/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala
index 6b7c828..db05f0d 100644
--- a/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala
+++ b/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala
@@ -481,7 +481,6 @@ class NCNlpSentence(
*/
def removeNote(note: NCNlpSentenceNote): Unit = this.foreach(_.remove(note))
- //noinspection HashCodeUsesVar
override def hashCode(): Int = {
if (hash == null)
hash = calcHash()
@@ -640,10 +639,10 @@ class NCNlpSentence(
}
/**
+ * Returns flag are note notes equal (or similar) or not. Reason of ignored difference can be stopwords tokens.
*
- * @param n1
- * @param n2
- * @return
+ * @param n1 First note.
+ * @param n2 Second note.
*/
def notesEqualOrSimilar(n1: NCNlpSentenceNote, n2: NCNlpSentenceNote): Boolean =
if (n1.noteType != n2.noteType)
[incubator-nlpcraft] 02/03: Merge branch 'NLPCRAFT-30-tmp' into
NLPCRAFT-30
Posted by se...@apache.org.
This is an automated email from the ASF dual-hosted git repository.
sergeykamov pushed a commit to branch NLPCRAFT-30
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
commit 0fc25c1adf6a90cae2c216ede72a73e01e4e1684
Merge: ad958d1 364d05d
Author: Sergey Kamov <se...@apache.org>
AuthorDate: Wed May 20 10:24:29 2020 +0300
Merge branch 'NLPCRAFT-30-tmp' into NLPCRAFT-30
.../apache/nlpcraft/common/nlp/NCNlpSentence.scala | 632 ++++++++++++++++-
.../nlpcraft/common/nlp/NCNlpSentenceNote.scala | 45 +-
.../apache/nlpcraft/model/impl/NCValueImpl.java | 50 --
.../nlpcraft/probe/mgrs/NCModelDecorator.scala | 71 +-
.../probe/mgrs/nlp/NCProbeEnrichmentManager.scala | 22 +-
.../probe/mgrs/nlp/enrichers/NCEnricherUtils.scala | 779 ---------------------
.../mgrs/nlp/enrichers/limit/NCLimitEnricher.scala | 27 +-
.../mgrs/nlp/enrichers/model/NCModelEnricher.scala | 8 +-
.../enrichers/relation/NCRelationEnricher.scala | 27 +-
.../mgrs/nlp/enrichers/sort/NCSortEnricher.scala | 24 +-
10 files changed, 809 insertions(+), 876 deletions(-)
[incubator-nlpcraft] 01/03: Utilities methods refactoring. Last
iteration.
Posted by se...@apache.org.
This is an automated email from the ASF dual-hosted git repository.
sergeykamov pushed a commit to branch NLPCRAFT-30
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
commit 364d05d2dbdf731998e951f089d621ab3fca87cc
Author: Sergey Kamov <se...@apache.org>
AuthorDate: Wed May 20 10:19:44 2020 +0300
Utilities methods refactoring. Last iteration.
---
.../apache/nlpcraft/common/nlp/NCNlpSentence.scala | 67 ++++++++-------
.../nlpcraft/probe/mgrs/NCModelDecorator.scala | 71 +++++++++++++++-
.../probe/mgrs/nlp/NCProbeEnrichmentManager.scala | 4 +-
.../mgrs/nlp/enrichers/limit/NCLimitEnricher.scala | 1 -
.../mgrs/nlp/enrichers/model/NCModelEnricher.scala | 5 +-
.../enrichers/relation/NCRelationEnricher.scala | 1 -
.../mgrs/nlp/enrichers/sort/NCSortEnricher.scala | 1 -
.../probe/mgrs/nlp/impl/NCVariantsCreator.scala | 96 ----------------------
8 files changed, 107 insertions(+), 139 deletions(-)
diff --git a/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala b/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala
index e20295b..6b7c828 100644
--- a/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala
+++ b/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala
@@ -53,18 +53,17 @@ object NCNlpSentence {
/**
* Example:
- *1. Sentence 'maximum x' (single element related function)
- *- maximum is aggregate function linked to date element.
- *- x defined as 2 elements: date and num.
+ * 1. Sentence 'maximum x' (single element related function)
+ * - maximum is aggregate function linked to date element.
+ * - x defined as 2 elements: date and num.
* So, the variant 'maximum x (as num)' should be excluded.
* *
- *2. Sentence 'compare x and y' (multiple elements related function)
- *- compare is relation function linked to date element.
- *- x an y defined as 2 elements: date and num.
+ * 2. Sentence 'compare x and y' (multiple elements related function)
+ * - compare is relation function linked to date element.
+ * - x an y defined as 2 elements: date and num.
* So, variants 'x (as num) and x (as date)' and 'x (as date) and x (as num)'
* should't be excluded, but invalid relation should be deleted for these combinations.
*/
-
types.size match {
case 0 ⇒ throw new AssertionError(s"Unexpected empty types [notesType=$notesType]")
case 1 ⇒ types.head == notesType
@@ -85,8 +84,8 @@ object NCNlpSentence {
* Note that 'idxsField' is 'indexes' and 'noteField' is 'note' for all kind of references.
*
* @param noteType Note type.
- * @param ns Sentence.
- * @param history Indexes transformation history.
+ * @param ns Sentence.
+ * @param history Indexes transformation history.
* @return Valid flag.
*/
private def fixIndexesReferences(noteType: String, ns: NCNlpSentence, history: Seq[(Int, Int)]): Boolean = {
@@ -114,10 +113,10 @@ object NCNlpSentence {
/**
* Copies token.
*
- * @param ns Sentence.
- * @param history Indexes transformation history.
+ * @param ns Sentence.
+ * @param history Indexes transformation history.
* @param toksCopy Copied tokens.
- * @param i Index.
+ * @param i Index.
*/
private def simpleCopy(
ns: NCNlpSentence,
@@ -134,9 +133,9 @@ object NCNlpSentence {
/**
* Glues stop words.
*
- * @param ns Sentence.
+ * @param ns Sentence.
* @param userNoteTypes Notes types.
- * @param history Indexes transformation history.
+ * @param history Indexes transformation history.
*/
private def unionStops(
ns: NCNlpSentence,
@@ -182,7 +181,7 @@ object NCNlpSentence {
/**
* Fixes indexes for all notes after recreating tokens.
*
- * @param ns Sentence.
+ * @param ns Sentence.
* @param userNoteTypes Notes types.
*/
private def fixIndexes(ns: NCNlpSentence, userNoteTypes: Seq[String]) {
@@ -205,10 +204,10 @@ object NCNlpSentence {
/**
* Zip notes with same type.
*
- * @param ns Sentence.
- * @param nType Notes type.
+ * @param ns Sentence.
+ * @param nType Notes type.
* @param userNotesTypes Notes types.
- * @param history Indexes transformation history.
+ * @param history Indexes transformation history.
*/
private def zipNotes(
ns: NCNlpSentence,
@@ -247,13 +246,13 @@ object NCNlpSentence {
/**
* Makes compound note.
*
- * @param ns Sentence.
+ * @param ns Sentence.
* @param nsCopyToks Tokens.
- * @param indexes Indexes.
- * @param stop Flag.
- * @param idx Index.
+ * @param indexes Indexes.
+ * @param stop Flag.
+ * @param idx Index.
* @param commonNote Common note.
- * @param history Indexes transformation history.
+ * @param history Indexes transformation history.
*/
private def mkCompound(
ns: NCNlpSentence,
@@ -339,11 +338,11 @@ object NCNlpSentence {
/**
* Fixes notes with references list to other notes indexes.
*
- * @param noteType Note type.
+ * @param noteType Note type.
* @param idxsField Indexes field.
* @param noteField Note field.
- * @param ns Sentence.
- * @param history Indexes transformation history.
+ * @param ns Sentence.
+ * @param history Indexes transformation history.
* @return Valid flag.
*/
private def fixIndexesReferencesList(
@@ -391,7 +390,7 @@ object NCNlpSentence {
/**
* Fixes tokens positions.
*
- * @param ns Sentence.
+ * @param ns Sentence.
* @param notNlpTypes Token types.
*/
private def collapseSentence(ns: NCNlpSentence, notNlpTypes: Seq[String]): Boolean = {
@@ -444,11 +443,11 @@ import org.apache.nlpcraft.common.nlp.NCNlpSentence._
* Parsed NLP sentence is a collection of tokens. Each token is a collection of notes and
* each note is a collection of KV pairs.
*
- * @param srvReqId Server request ID.
- * @param text Normalized text.
- * @param weight Weight.
+ * @param srvReqId Server request ID.
+ * @param text Normalized text.
+ * @param weight Weight.
* @param enabledBuiltInToks Enabled built-in tokens.
- * @param tokens Initial buffer.
+ * @param tokens Initial buffer.
*/
class NCNlpSentence(
val srvReqId: String,
@@ -704,9 +703,9 @@ class NCNlpSentence(
override def equals(obj: Any): Boolean = obj match {
case x: NCNlpSentence ⇒
tokens == x.tokens &&
- srvReqId == x.srvReqId &&
- text == x.text &&
- enabledBuiltInToks == x.enabledBuiltInToks
+ srvReqId == x.srvReqId &&
+ text == x.text &&
+ enabledBuiltInToks == x.enabledBuiltInToks
case _ ⇒ false
}
}
diff --git a/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCModelDecorator.scala b/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCModelDecorator.scala
index a4b28fd..2cfe417 100644
--- a/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCModelDecorator.scala
+++ b/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCModelDecorator.scala
@@ -17,7 +17,17 @@
package org.apache.nlpcraft.probe.mgrs
-import org.apache.nlpcraft.model.{NCElement, NCModel}
+import java.io.Serializable
+import java.util
+
+import org.apache.nlpcraft.common.TOK_META_ALIASES_KEY
+import org.apache.nlpcraft.common.nlp.NCNlpSentence
+import org.apache.nlpcraft.model.impl.{NCTokenImpl, NCVariantImpl}
+import org.apache.nlpcraft.model.{NCElement, NCModel, NCVariant}
+
+import scala.collection.JavaConverters._
+import scala.collection.{Seq, mutable}
+import scala.language.implicitConversions
/**
*
@@ -38,6 +48,65 @@ case class NCModelDecorator(
suspiciousWordsStems: Set[String],
elements: Map[String/*Element ID*/, NCElement]
) extends java.io.Serializable {
+ /**
+ * Makes variants for given sentences.
+ *
+ * @param srvReqId Server request ID.
+ * @param sens Sentences.
+ */
+ def makeVariants(srvReqId: String, sens: Seq[NCNlpSentence]): Seq[NCVariant] = {
+ val seq = sens.map(_.toSeq.map(nlpTok ⇒ NCTokenImpl(this, srvReqId, nlpTok) → nlpTok))
+ val toks = seq.map(_.map { case (tok, _) ⇒ tok })
+
+ case class Key(id: String, from: Int, to: Int)
+
+ val keys2Toks = toks.flatten.map(t ⇒ Key(t.getId, t.getStartCharIndex, t.getEndCharIndex) → t).toMap
+ val partsKeys = mutable.HashSet.empty[Key]
+
+ seq.flatten.foreach { case (tok, tokNlp) ⇒
+ if (tokNlp.isUser) {
+ val userNotes = tokNlp.filter(_.isUser)
+
+ require(userNotes.size == 1)
+
+ val optList: Option[util.List[util.HashMap[String, Serializable]]] = userNotes.head.dataOpt("parts")
+
+ optList match {
+ case Some(list) ⇒
+ val keys =
+ list.asScala.map(m ⇒
+ Key(
+ m.get("id").asInstanceOf[String],
+ m.get("startcharindex").asInstanceOf[Integer],
+ m.get("endcharindex").asInstanceOf[Integer]
+ )
+ )
+ val parts = keys.map(keys2Toks)
+
+ parts.zip(list.asScala).foreach { case (part, map) ⇒
+ map.get(TOK_META_ALIASES_KEY) match {
+ case null ⇒ // No-op.
+ case aliases ⇒ part.getMetadata.put(TOK_META_ALIASES_KEY, aliases.asInstanceOf[Object])
+ }
+ }
+
+ tok.setParts(parts)
+ partsKeys ++= keys
+
+ case None ⇒ // No-op.
+ }
+ }
+ }
+
+ // We can't collapse parts earlier, because we need them here (setParts method, few lines above.)
+ toks.filter(sen ⇒
+ !sen.exists(t ⇒
+ t.getId != "nlpcraft:nlp" &&
+ partsKeys.contains(Key(t.getId, t.getStartCharIndex, t.getEndCharIndex))
+ )
+ ).map(p ⇒ new NCVariantImpl(p.asJava))
+ }
+
override def toString: String = {
s"Probe model decorator [" +
s"id=${model.getId}, " +
diff --git a/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/NCProbeEnrichmentManager.scala b/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/NCProbeEnrichmentManager.scala
index 693626a..7f653a0 100644
--- a/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/NCProbeEnrichmentManager.scala
+++ b/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/NCProbeEnrichmentManager.scala
@@ -46,7 +46,7 @@ import org.apache.nlpcraft.probe.mgrs.nlp.enrichers.relation.NCRelationEnricher
import org.apache.nlpcraft.probe.mgrs.nlp.enrichers.sort.NCSortEnricher
import org.apache.nlpcraft.probe.mgrs.nlp.enrichers.stopword.NCStopWordEnricher
import org.apache.nlpcraft.probe.mgrs.nlp.enrichers.suspicious.NCSuspiciousNounsEnricher
-import org.apache.nlpcraft.probe.mgrs.nlp.impl.{NCVariantsCreator, _}
+import org.apache.nlpcraft.probe.mgrs.nlp.impl.{_}
import org.apache.nlpcraft.probe.mgrs.nlp.validate._
import scala.collection.JavaConverters._
@@ -475,7 +475,7 @@ object NCProbeEnrichmentManager extends NCService with NCOpenCensusModelStats {
val meta = mutable.HashMap.empty[String, Any] ++ senMeta
val req = NCRequestImpl(meta, srvReqId)
- var senVars = NCVariantsCreator.makeVariants(mdlDec, srvReqId, sensSeq)
+ var senVars = mdlDec.makeVariants(srvReqId, sensSeq)
// Sentence variants can be filtered by model.
val fltSenVars: Seq[(NCVariant, Int)] =
diff --git a/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/limit/NCLimitEnricher.scala b/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/limit/NCLimitEnricher.scala
index fc826ea..42aaf18 100644
--- a/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/limit/NCLimitEnricher.scala
+++ b/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/limit/NCLimitEnricher.scala
@@ -27,7 +27,6 @@ import org.apache.nlpcraft.common.nlp.{NCNlpSentence, NCNlpSentenceNote, NCNlpSe
import org.apache.nlpcraft.common.{NCE, NCService}
import org.apache.nlpcraft.probe.mgrs.NCModelDecorator
import org.apache.nlpcraft.probe.mgrs.nlp.NCProbeEnricher
-import org.apache.nlpcraft.probe.mgrs.nlp.impl.NCVariantsCreator
import scala.collection.JavaConverters._
import scala.collection.{Map, Seq, mutable}
diff --git a/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala b/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
index 255e575..6845150 100644
--- a/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
+++ b/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
@@ -25,7 +25,7 @@ import org.apache.nlpcraft.common._
import org.apache.nlpcraft.common.nlp.{NCNlpSentenceToken, _}
import org.apache.nlpcraft.model._
import org.apache.nlpcraft.probe.mgrs.nlp.NCProbeEnricher
-import org.apache.nlpcraft.probe.mgrs.nlp.impl.{NCVariantsCreator, NCRequestImpl}
+import org.apache.nlpcraft.probe.mgrs.nlp.impl.NCRequestImpl
import org.apache.nlpcraft.probe.mgrs.{NCModelDecorator, NCSynonym}
import scala.collection.JavaConverters._
@@ -375,8 +375,7 @@ object NCModelEnricher extends NCProbeEnricher with DecorateAsScala {
found = false
if (collapsedSens == null)
- collapsedSens = NCVariantsCreator.
- makeVariants(mdl, ns.srvReqId, ns.clone().collapse()).map(_.asScala)
+ collapsedSens = mdl.makeVariants(ns.srvReqId, ns.clone().collapse()).map(_.asScala)
if (seq == null)
seq = convert(ns, collapsedSens, toks)
diff --git a/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/relation/NCRelationEnricher.scala b/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/relation/NCRelationEnricher.scala
index 799e687..0318533 100644
--- a/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/relation/NCRelationEnricher.scala
+++ b/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/relation/NCRelationEnricher.scala
@@ -26,7 +26,6 @@ import org.apache.nlpcraft.common.nlp.{NCNlpSentence, NCNlpSentenceNote, NCNlpSe
import org.apache.nlpcraft.common.{NCE, NCService}
import org.apache.nlpcraft.probe.mgrs.NCModelDecorator
import org.apache.nlpcraft.probe.mgrs.nlp.NCProbeEnricher
-import org.apache.nlpcraft.probe.mgrs.nlp.impl.NCVariantsCreator
import scala.collection.JavaConverters._
import scala.collection.{Map, Seq, mutable}
diff --git a/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/sort/NCSortEnricher.scala b/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/sort/NCSortEnricher.scala
index c8ebb35..484cb86 100644
--- a/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/sort/NCSortEnricher.scala
+++ b/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/sort/NCSortEnricher.scala
@@ -26,7 +26,6 @@ import org.apache.nlpcraft.common.nlp.core.NCNlpCoreManager
import org.apache.nlpcraft.common.nlp.{NCNlpSentence, NCNlpSentenceNote, NCNlpSentenceToken}
import org.apache.nlpcraft.probe.mgrs.NCModelDecorator
import org.apache.nlpcraft.probe.mgrs.nlp.NCProbeEnricher
-import org.apache.nlpcraft.probe.mgrs.nlp.impl.NCVariantsCreator
import scala.collection.JavaConverters._
import scala.collection.mutable.ArrayBuffer
diff --git a/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/impl/NCVariantsCreator.scala b/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/impl/NCVariantsCreator.scala
deleted file mode 100644
index 9cd7bad..0000000
--- a/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/impl/NCVariantsCreator.scala
+++ /dev/null
@@ -1,96 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.nlpcraft.probe.mgrs.nlp.impl
-
-import java.io.Serializable
-import java.util
-
-import org.apache.nlpcraft.common.TOK_META_ALIASES_KEY
-import org.apache.nlpcraft.common.nlp.NCNlpSentence
-import org.apache.nlpcraft.model.NCVariant
-import org.apache.nlpcraft.model.impl.{NCTokenImpl, NCVariantImpl}
-import org.apache.nlpcraft.probe.mgrs.NCModelDecorator
-
-import scala.collection.JavaConverters._
-import scala.collection.{Seq, mutable}
-import scala.language.implicitConversions
-
-/**
- * Utility class prepare list of variants based on list of sentences.
- */
-private[nlp] object NCVariantsCreator {
- /**
- * Makes variants for given sentences.
- *
- * @param mdl Model.
- * @param srvReqId Server request ID.
- * @param sens Sentences.
- */
- def makeVariants(mdl: NCModelDecorator, srvReqId: String, sens: Seq[NCNlpSentence]): Seq[NCVariant] = {
- val seq = sens.map(_.toSeq.map(nlpTok ⇒ NCTokenImpl(mdl, srvReqId, nlpTok) → nlpTok))
- val toks = seq.map(_.map { case (tok, _) ⇒ tok })
-
- case class Key(id: String, from: Int, to: Int)
-
- val keys2Toks = toks.flatten.map(t ⇒ Key(t.getId, t.getStartCharIndex, t.getEndCharIndex) → t).toMap
- val partsKeys = mutable.HashSet.empty[Key]
-
- seq.flatten.foreach { case (tok, tokNlp) ⇒
- if (tokNlp.isUser) {
- val userNotes = tokNlp.filter(_.isUser)
-
- require(userNotes.size == 1)
-
- val optList: Option[util.List[util.HashMap[String, Serializable]]] = userNotes.head.dataOpt("parts")
-
- optList match {
- case Some(list) ⇒
- val keys =
- list.asScala.map(m ⇒
- Key(
- m.get("id").asInstanceOf[String],
- m.get("startcharindex").asInstanceOf[Integer],
- m.get("endcharindex").asInstanceOf[Integer]
- )
- )
- val parts = keys.map(keys2Toks)
-
- parts.zip(list.asScala).foreach { case (part, map) ⇒
- map.get(TOK_META_ALIASES_KEY) match {
- case null ⇒ // No-op.
- case aliases ⇒ part.getMetadata.put(TOK_META_ALIASES_KEY, aliases.asInstanceOf[Object])
- }
- }
-
- tok.setParts(parts)
- partsKeys ++= keys
-
- case None ⇒ // No-op.
- }
- }
- }
-
- // We can't collapse parts earlier, because we need them here (setParts method, few lines above.)
- toks.filter(sen ⇒
- !sen.exists(t ⇒
- t.getId != "nlpcraft:nlp" &&
- partsKeys.contains(Key(t.getId, t.getStartCharIndex, t.getEndCharIndex))
- )
- ).map(p ⇒ new NCVariantImpl(p.asJava))
- }
-}