You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nlpcraft.apache.org by ar...@apache.org on 2022/12/01 19:57:00 UTC
[incubator-nlpcraft] branch master updated: WIP
This is an automated email from the ASF dual-hosted git repository.
aradzinski pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
The following commit(s) were added to refs/heads/master by this push:
new 11b123bc WIP
11b123bc is described below
commit 11b123bc8255eeb44b72464c4f438915f686799d
Author: Aaron Radzinski <ar...@datalingvo.com>
AuthorDate: Thu Dec 1 11:56:54 2022 -0800
WIP
---
.../scala/org/apache/nlpcraft/NCPipeline.scala | 50 ++++++++--------------
.../org/apache/nlpcraft/NCPipelineBuilder.scala | 13 +++---
.../internal/impl/NCModelPipelineManager.scala | 14 +++---
.../apache/nlpcraft/nlp/NCVariantFilterSpec.scala | 2 +-
.../apache/nlpcraft/nlp/util/NCTestPipeline.scala | 4 +-
5 files changed, 36 insertions(+), 47 deletions(-)
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCPipeline.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCPipeline.scala
index c1c24428..bb3d0958 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCPipeline.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCPipeline.scala
@@ -20,47 +20,35 @@ package org.apache.nlpcraft
/**
* NLP processing pipeline for the input request. Pipeline is associated with the model.
*
- * An NLP pipeline is a container for various processing components that take the input text at the beginning of the
- * pipeline and produce the list of {@link NCEntity entities} at the end of the pipeline.
+ * An NLP pipeline is a container for the sequence of processing components that take the input text at the beginning
+ * of the pipeline and produce the list of [[NCVariant variants]] at the end of the pipeline.
* Schematically the pipeline looks like this:
* <pre>
- * +----------+ +-----------+
- * *=========* +---------+ +---+-------+ | +---+-------+ |
- * : Text : -> | Token | -> | Token | | -> | Token | | ----.
- * : Input : | Parser | | Enrichers |--+ | Validators |--+ \
- * *=========* +---------+ +-----------+ +------------+ \
- * }
- * +-----------+ +----------+ +--------+ /
- * *=========* +---+--------+ | +---+-------+ | +---+-----+ | /
- * : Entity : <- | Entity | | <- | Entity | | <- | Entity | | <-
- * : List : | Validators |--+ | Enrichers |--+ | Parsers |--+
- * *=========* +------------+ +-----------+ +---------+
+ * +----------+ +-----------+ +--------+
+ * *=========* +---------+ +---+-------+ | +---+-------+ | +---+-----+ |
+ * : Text : -> | Token | -> | Token | | -> | Token | | -> | Entity | | ----.
+ * : Input : | Parser | | Enrichers |--+ | Validators |--+ | Parsers |--+ \
+ * *=========* +---------+ +-----------+ +------------+ +---------+ \
+ * }
+ * +--------+ +--------+ +-----------+ +----------+ /
+ * *============* +---+-----+ | +---+-----+ | +---+--------+ | +---+-------+ | /
+ * : Variants : <- | Variant | | <- | Entity | | <- | Entity | | <- | Entity | | <-'
+ * : List : | Filters |--+ | Mappers |--+ | Validators |--+ | Enrichers |--+
+ * *============* +----- ---+ +----- ---+ +------------+ +-----------+
* </pre>
*
* Pipeline has the following components:
- * <ul>
- * <li>
- * {@link NCTokenParser} is responsible for taking the input text and tokenize it into a list of
- * {@link NCToken
- * }. This process is called tokenization, i.e. the process of demarcating and
- * classifying sections of a string of input characters. There's only one token parser for the pipeline.
- * </li>
- * <li>
- * After the initial list of token is
- * </li>
- * </ul>
*
*/
trait NCPipeline:
/**
- *
- * @return */
+ * Get the token parser. One token parser is required for the pipeline.
+ */
def getTokenParser: NCTokenParser
/**
* Gets the list of entity parser. At least one entity parser is required.
- *
- * @return */
+ */
def getEntityParsers: List[NCEntityParser]
/**
@@ -86,11 +74,9 @@ trait NCPipeline:
/**
*
*/
- def getVariantFilter: Option[NCVariantFilter] = None
+ def getVariantFilters: List[NCVariantFilter] = List.empty
/**
- * Gets optional list of entity mappers.
- *
- * @return Optional list of entity mappers. Can be empty but never `null`.
+ * Gets optional list of entity mappers. Can return an empty list but never `null`.
*/
def getEntityMappers: List[NCEntityMapper] = List.empty
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCPipelineBuilder.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCPipelineBuilder.scala
index f49049a3..d15ba80f 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCPipelineBuilder.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCPipelineBuilder.scala
@@ -34,7 +34,7 @@ class NCPipelineBuilder:
private val tokVals: Buf[NCTokenValidator] = Buf.empty
private val entVals: Buf[NCEntityValidator] = Buf.empty
private val entMappers: Buf[NCEntityMapper] = Buf.empty
- private var varFilter: Option[NCVariantFilter] = None
+ private var varFilters: Buf[NCVariantFilter] = Buf.empty
/**
*
@@ -134,10 +134,10 @@ class NCPipelineBuilder:
this
/**
- * @param varFilter
+ * @param varFilters
* @return This instance for call chaining. */
- def withVariantFilter(varFilter: NCVariantFilter): NCPipelineBuilder =
- this.varFilter = Some(varFilter)
+ def withVariantFilters(varFilters: List[NCVariantFilter]): NCPipelineBuilder =
+ this.varFilters ++= varFilters
this
/**
@@ -152,7 +152,8 @@ class NCPipelineBuilder:
/**
*
* @param entMappers
- * @return This instance for call chaining. */
+ * @return This instance for call chaining.
+ */
def withEntityMappers(entMappers: List[NCEntityMapper]): NCPipelineBuilder =
require(entMappers != null, "List of entity mappers cannot be null.")
entMappers.foreach((p: NCEntityMapper) => require(p != null, "Entity mapper cannot be null."))
@@ -234,5 +235,5 @@ class NCPipelineBuilder:
override def getEntityParsers: List[NCEntityParser] = entParsers.toList
override def getTokenValidators: List[NCTokenValidator] = tokVals.toList
override def getEntityValidators: List[NCEntityValidator] = entVals.toList
- override def getVariantFilter: Option[NCVariantFilter] = varFilter
+ override def getVariantFilters: List[NCVariantFilter] = varFilters.toList
override def getEntityMappers: List[NCEntityMapper] = entMappers.toList
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/impl/NCModelPipelineManager.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/impl/NCModelPipelineManager.scala
index e97657c1..c2f56c74 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/impl/NCModelPipelineManager.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/impl/NCModelPipelineManager.scala
@@ -31,6 +31,8 @@ import java.util.concurrent.*
import java.util.concurrent.atomic.*
import java.util.function.Predicate
import scala.concurrent.ExecutionContext
+import scala.jdk.CollectionConverters.*
+
/**
*
@@ -54,10 +56,10 @@ class NCModelPipelineManager(cfg: NCModelConfig, pipeline: NCPipeline) extends L
private val tokVals = nvl(pipeline.getTokenValidators)
private val entVals = nvl(pipeline.getEntityValidators)
private val entMappers = nvl(pipeline.getEntityMappers)
- private val varFilterOpt = pipeline.getVariantFilter
+ private val varFilters = nvl(pipeline.getVariantFilters)
private val allComps: Seq[NCLifecycle] =
- tokEnrichers ++ entEnrichers ++ entParsers ++ tokVals ++ entVals ++ entMappers ++ varFilterOpt.toSeq
+ tokEnrichers ++ entEnrichers ++ entParsers ++ tokVals ++ entVals ++ entMappers ++ varFilters
/**
* Processes pipeline components.
@@ -153,7 +155,6 @@ class NCModelPipelineManager(cfg: NCModelConfig, pipeline: NCPipeline) extends L
map { case (_, ents) => if ents.sizeIs > 1 then ents.toSet else Set.empty }.filter(_.nonEmpty)
var variants: List[NCVariant] =
- import scala.jdk.CollectionConverters.*
if overlapEnts.nonEmpty then
NCModelPipelineHelper.
findCombinations(overlapEnts.map(_.asJava).asJava, pool).asScala.
@@ -164,15 +165,15 @@ class NCModelPipelineManager(cfg: NCModelConfig, pipeline: NCPipeline) extends L
else
List(newVariant(entities))
- if varFilterOpt.isDefined then variants = varFilterOpt.get.filter(req, cfg, variants)
+ variants = varFilters.foldRight(variants)((filter, vars) => filter.filter(req, cfg, vars))
// Skips empty variants.
val vrns = variants.filter(_.getEntities.nonEmpty)
- for ((v, i) <- vrns.zipWithIndex)
+ for (v, i) <- vrns.zipWithIndex do
val tbl = NCAsciiTable("EntityId", "Tokens", "Tokens Position", "Properties")
- for (e <- v.getEntities)
+ for e <- v.getEntities do
val toks = e.getTokens
tbl += (
e.getId,
@@ -185,6 +186,7 @@ class NCModelPipelineManager(cfg: NCModelConfig, pipeline: NCPipeline) extends L
NCPipelineData(req, vrns, toks)
def start(): Unit = processComponents(_.onStart(cfg), "started")
+
/**
*
*/
diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/NCVariantFilterSpec.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/NCVariantFilterSpec.scala
index 10254a13..983089ef 100644
--- a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/NCVariantFilterSpec.scala
+++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/NCVariantFilterSpec.scala
@@ -52,7 +52,7 @@ class NCVariantFilterSpec extends AnyFunSuite:
)
test0(
- mkPipeline(_.withVariantFilter((_: NCRequest, _: NCModelConfig, _: List[NCVariant]) => List.empty)),
+ mkPipeline(_.withVariantFilters(List((_: NCRequest, _: NCModelConfig, _: List[NCVariant]) => List.empty))),
false
)
}
\ No newline at end of file
diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/util/NCTestPipeline.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/util/NCTestPipeline.scala
index 6ca853c6..f596d580 100644
--- a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/util/NCTestPipeline.scala
+++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/util/NCTestPipeline.scala
@@ -36,7 +36,7 @@ case class NCTestPipeline(tokParser: NCTokenParser) extends NCPropertyMapAdapter
val tokVals: Buf[NCTokenValidator] = Buf.empty
val entVals: Buf[NCEntityValidator] = Buf.empty
val entMappers: Buf[NCEntityMapper] = Buf.empty
- var varFilter: Option[NCVariantFilter] = None
+ var varFilters: Buf[NCVariantFilter] = Buf.empty
override def getTokenParser: NCTokenParser = tokParser
override def getTokenEnrichers: List[NCTokenEnricher] = tokEnrichers.toList
@@ -45,4 +45,4 @@ case class NCTestPipeline(tokParser: NCTokenParser) extends NCPropertyMapAdapter
override def getTokenValidators: List[NCTokenValidator] = tokVals.toList
override def getEntityValidators: List[NCEntityValidator] = entVals.toList
override def getEntityMappers: List[NCEntityMapper] = entMappers.toList
- override def getVariantFilter: Option[NCVariantFilter] = varFilter
\ No newline at end of file
+ override def getVariantFilters: List[NCVariantFilter] = varFilters.toList
\ No newline at end of file